Merge branch 'fixes-2.6.39' into for-2.6.40

commit: 6988f20fe04e9ef3aea488cb8ab57fbeb78e12f0 [log] [tgz]
author: Tejun Heo <tj@kernel.org> Tue May 24 09:59:36 2011 +0200
committer: Tejun Heo <tj@kernel.org> Tue May 24 09:59:36 2011 +0200
tree: c9d7fc50a2e2147a5ca07e3096e7eeb916ad2da9
parent: 0415b00d175e0d8945e6785aad21b5f157976ce0 [diff]
parent: 6ea0c34dac89611126455537552cffe6c7e832ad [diff]
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
index 4f29e5f1..f5bb0a3 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss

@@ -59,3 +59,15 @@
 Contact:	iss_storagedev@hp.com
 Description:	Displays the usage count (number of opens) of logical drive Y
 		of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/resettable
+Date:		February 2011
+Kernel Version:	2.6.38
+Contact:	iss_storagedev@hp.com
+Description:	Value of 1 indicates the controller can honor the reset_devices
+		kernel parameter.  Value of 0 indicates reset_devices cannot be
+		honored.  This is to allow, for example, kexec tools to be able
+		to warn the user if they designate an unresettable device as
+		a dump device, as kdump requires resetting the device in order
+		to work reliably.
+

diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
index 5fb7099..f22ac08 100644
--- a/Documentation/ABI/testing/sysfs-fs-ext4
+++ b/Documentation/ABI/testing/sysfs-fs-ext4

@@ -48,7 +48,7 @@
 		 will have its blocks allocated out of its own unique
 		 preallocation pool.
 
-What:		/sys/fs/ext4/<disk>/inode_readahead
+What:		/sys/fs/ext4/<disk>/inode_readahead_blks
 Date:		March 2008
 Contact:	"Theodore Ts'o" <tytso@mit.edu>
 Description:
@@ -85,7 +85,14 @@
 Contact:	"Theodore Ts'o" <tytso@mit.edu>
 Description:
 		Tuning parameter which (if non-zero) controls the goal
-		inode used by the inode allocator in p0reference to
-		all other allocation hueristics.  This is intended for
+		inode used by the inode allocator in preference to
+		all other allocation heuristics.  This is intended for
 		debugging use only, and should be 0 on production
 		systems.
+
+What:		/sys/fs/ext4/<disk>/max_writeback_mb_bump
+Date:		September 2009
+Contact:	"Theodore Ts'o" <tytso@mit.edu>
+Description:
+		The maximum number of megabytes the writeback code will
+		try to write out before move on to another inode.

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 2deb069..8436b01 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile

@@ -55,7 +55,6 @@
 build_images = mkdir -p $(objtree)/Documentation/DocBook/media/ && \
 	       cp $(srctree)/Documentation/DocBook/dvb/*.png \
 	          $(srctree)/Documentation/DocBook/v4l/*.gif \
-	          $(srctree)/Documentation/DocBook/v4l/*.png \
 		  $(objtree)/Documentation/DocBook/media/
 
 xmldoclinks:

diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl
index 54eb26b..5047936 100644
--- a/Documentation/DocBook/rapidio.tmpl
+++ b/Documentation/DocBook/rapidio.tmpl

@@ -133,7 +133,6 @@
 !Idrivers/rapidio/rio-sysfs.c
      </sect1>
      <sect1 id="PPC32_support"><title>PPC32 support</title>
-!Earch/powerpc/sysdev/fsl_rio.c
 !Iarch/powerpc/sysdev/fsl_rio.c
      </sect1>
   </chapter>

diff --git a/Documentation/development-process/1.Intro b/Documentation/development-process/1.Intro
index 8cc2cba..9b61448 100644
--- a/Documentation/development-process/1.Intro
+++ b/Documentation/development-process/1.Intro

@@ -56,13 +56,13 @@
 
 1.2: WHAT THIS DOCUMENT IS ABOUT
 
-The Linux kernel, at over 6 million lines of code and well over 1000 active
-contributors, is one of the largest and most active free software projects
-in existence.  Since its humble beginning in 1991, this kernel has evolved
-into a best-of-breed operating system component which runs on pocket-sized
-digital music players, desktop PCs, the largest supercomputers in
-existence, and all types of systems in between.  It is a robust, efficient,
-and scalable solution for almost any situation.
+The Linux kernel, at over 8 million lines of code and well over 1000
+contributors to each release, is one of the largest and most active free
+software projects in existence.  Since its humble beginning in 1991, this
+kernel has evolved into a best-of-breed operating system component which
+runs on pocket-sized digital music players, desktop PCs, the largest
+supercomputers in existence, and all types of systems in between.  It is a
+robust, efficient, and scalable solution for almost any situation.
 
 With the growth of Linux has come an increase in the number of developers
 (and companies) wishing to participate in its development.  Hardware
@@ -115,7 +115,7 @@
 improved by comments from Johannes Berg, James Berry, Alex Chiang, Roland
 Dreier, Randy Dunlap, Jake Edge, Jiri Kosina, Matt Mackall, Arthur Marsh,
 Amanda McPherson, Andrew Morton, Andrew Price, Tsugikazu Shibata, and
-Jochen Voß. 
+Jochen Voß.
 
 This work was supported by the Linux Foundation; thanks especially to
 Amanda McPherson, who saw the value of this effort and made it all happen.
@@ -221,7 +221,7 @@
 - Everything that was said above about code review applies doubly to
   closed-source code.  Since this code is not available at all, it cannot
   have been reviewed by the community and will, beyond doubt, have serious
-  problems. 
+  problems.
 
 Makers of embedded systems, in particular, may be tempted to disregard much
 of what has been said in this section in the belief that they are shipping

diff --git a/Documentation/development-process/2.Process b/Documentation/development-process/2.Process
index 911a451..4823577 100644
--- a/Documentation/development-process/2.Process
+++ b/Documentation/development-process/2.Process

@@ -14,16 +14,15 @@
 major kernel release happening every two or three months.  The recent
 release history looks like this:
 
-	2.6.26	July 13, 2008
-	2.6.25	April 16, 2008
-	2.6.24	January 24, 2008
-	2.6.23	October 9, 2007
-	2.6.22	July 8, 2007
-	2.6.21	April 25, 2007
-	2.6.20	February 4, 2007
+	2.6.38	March 14, 2011
+	2.6.37	January 4, 2011
+	2.6.36	October 20, 2010
+	2.6.35	August 1, 2010
+	2.6.34	May 15, 2010
+	2.6.33	February 24, 2010
 
 Every 2.6.x release is a major kernel release with new features, internal
-API changes, and more.  A typical 2.6 release can contain over 10,000
+API changes, and more.  A typical 2.6 release can contain nearly 10,000
 changesets with changes to several hundred thousand lines of code.  2.6 is
 thus the leading edge of Linux kernel development; the kernel uses a
 rolling development model which is continually integrating major changes.
@@ -42,13 +41,13 @@
 and staged ahead of time.  How that process works will be described in
 detail later on).
 
-The merge window lasts for two weeks.  At the end of this time, Linus
-Torvalds will declare that the window is closed and release the first of
-the "rc" kernels.  For the kernel which is destined to be 2.6.26, for
-example, the release which happens at the end of the merge window will be
-called 2.6.26-rc1.  The -rc1 release is the signal that the time to merge
-new features has passed, and that the time to stabilize the next kernel has
-begun.
+The merge window lasts for approximately two weeks.  At the end of this
+time, Linus Torvalds will declare that the window is closed and release the
+first of the "rc" kernels.  For the kernel which is destined to be 2.6.40,
+for example, the release which happens at the end of the merge window will
+be called 2.6.40-rc1.  The -rc1 release is the signal that the time to
+merge new features has passed, and that the time to stabilize the next
+kernel has begun.
 
 Over the next six to ten weeks, only patches which fix problems should be
 submitted to the mainline.  On occasion a more significant change will be
@@ -66,20 +65,19 @@
 considered to be sufficiently stable and the final 2.6.x release is made.
 At that point the whole process starts over again.
 
-As an example, here is how the 2.6.25 development cycle went (all dates in
-2008): 
+As an example, here is how the 2.6.38 development cycle went (all dates in
+2011):
 
-	January 24	2.6.24 stable release
-	February 10	2.6.25-rc1, merge window closes
-	February 15	2.6.25-rc2
-	February 24	2.6.25-rc3
-	March 4	 	2.6.25-rc4
-	March 9		2.6.25-rc5
-	March 16	2.6.25-rc6
-	March 25	2.6.25-rc7
-	April 1		2.6.25-rc8
-	April 11	2.6.25-rc9
-	April 16	2.6.25 stable release
+	January 4	2.6.37 stable release
+	January 18	2.6.38-rc1, merge window closes
+	January 21	2.6.38-rc2
+	February 1	2.6.38-rc3
+	February 7	2.6.38-rc4
+	February 15	2.6.38-rc5
+	February 21	2.6.38-rc6
+	March 1		2.6.38-rc7
+	March 7		2.6.38-rc8
+	March 14	2.6.38 stable release
 
 How do the developers decide when to close the development cycle and create
 the stable release?  The most significant metric used is the list of
@@ -87,7 +85,7 @@
 break systems which worked in the past are considered to be especially
 serious.  For this reason, patches which cause regressions are looked upon
 unfavorably and are quite likely to be reverted during the stabilization
-period. 
+period.
 
 The developers' goal is to fix all known regressions before the stable
 release is made.  In the real world, this kind of perfection is hard to
@@ -99,26 +97,34 @@
 of them are serious.
 
 Once a stable release is made, its ongoing maintenance is passed off to the
-"stable team," currently comprised of Greg Kroah-Hartman and Chris Wright.
-The stable team will release occasional updates to the stable release using
-the 2.6.x.y numbering scheme.  To be considered for an update release, a
-patch must (1) fix a significant bug, and (2) already be merged into the
-mainline for the next development kernel.  Continuing our 2.6.25 example,
-the history (as of this writing) is:
+"stable team," currently consisting of Greg Kroah-Hartman.  The stable team
+will release occasional updates to the stable release using the 2.6.x.y
+numbering scheme.  To be considered for an update release, a patch must (1)
+fix a significant bug, and (2) already be merged into the mainline for the
+next development kernel.  Kernels will typically receive stable updates for
+a little more than one development cycle past their initial release.  So,
+for example, the 2.6.36 kernel's history looked like:
 
-	May 1		2.6.25.1
-	May 6		2.6.25.2 
-	May 9		2.6.25.3 
-	May 15		2.6.25.4
-	June 7		2.6.25.5
-	June 9		2.6.25.6
-	June 16		2.6.25.7
-	June 21		2.6.25.8
-	June 24		2.6.25.9
+	October 10	2.6.36 stable release
+	November 22	2.6.36.1
+	December 9	2.6.36.2
+	January 7	2.6.36.3
+	February 17	2.6.36.4
 
-Stable updates for a given kernel are made for approximately six months;
-after that, the maintenance of stable releases is solely the responsibility
-of the distributors which have shipped that particular kernel.
+2.6.36.4 was the final stable update for the 2.6.36 release.
+
+Some kernels are designated "long term" kernels; they will receive support
+for a longer period.  As of this writing, the current long term kernels
+and their maintainers are:
+
+	2.6.27	Willy Tarreau		(Deep-frozen stable kernel)
+	2.6.32	Greg Kroah-Hartman
+	2.6.35	Andi Kleen		(Embedded flag kernel)
+
+The selection of a kernel for long-term support is purely a matter of a
+maintainer having the need and the time to maintain that release.  There
+are no known plans for long-term support for any specific upcoming
+release.
 
 
 2.2: THE LIFECYCLE OF A PATCH
@@ -130,7 +136,7 @@
 This process can happen quickly for minor fixes, or, in the case of large
 and controversial changes, go on for years.  Much developer frustration
 comes from a lack of understanding of this process or from attempts to
-circumvent it.  
+circumvent it.
 
 In the hopes of reducing that frustration, this document will describe how
 a patch gets into the kernel.  What follows below is an introduction which
@@ -193,8 +199,8 @@
 2.3: HOW PATCHES GET INTO THE KERNEL
 
 There is exactly one person who can merge patches into the mainline kernel
-repository: Linus Torvalds.  But, of the over 12,000 patches which went
-into the 2.6.25 kernel, only 250 (around 2%) were directly chosen by Linus
+repository: Linus Torvalds.  But, of the over 9,500 patches which went
+into the 2.6.38 kernel, only 112 (around 1.3%) were directly chosen by Linus
 himself.  The kernel project has long since grown to a size where no single
 developer could possibly inspect and select every patch unassisted.  The
 way the kernel developers have addressed this growth is through the use of
@@ -229,7 +235,7 @@
 etc.  This chain of repositories can be arbitrarily long, though it rarely
 exceeds two or three links.  Since each maintainer in the chain trusts
 those managing lower-level trees, this process is known as the "chain of
-trust." 
+trust."
 
 Clearly, in a system like this, getting patches into the kernel depends on
 finding the right maintainer.  Sending patches directly to Linus is not
@@ -254,7 +260,7 @@
 collected for testing and review.  The older of these trees, maintained by
 Andrew Morton, is called "-mm" (for memory management, which is how it got
 started).  The -mm tree integrates patches from a long list of subsystem
-trees; it also has some patches aimed at helping with debugging.  
+trees; it also has some patches aimed at helping with debugging.
 
 Beyond that, -mm contains a significant collection of patches which have
 been selected by Andrew directly.  These patches may have been posted on a
@@ -264,8 +270,8 @@
 patch into the mainline, it is likely to end up in -mm.  Miscellaneous
 patches which accumulate in -mm will eventually either be forwarded on to
 an appropriate subsystem tree or be sent directly to Linus.  In a typical
-development cycle, approximately 10% of the patches going into the mainline
-get there via -mm.
+development cycle, approximately 5-10% of the patches going into the
+mainline get there via -mm.
 
 The current -mm patch is available in the "mmotm" (-mm of the moment)
 directory at:
@@ -275,7 +281,7 @@
 Use of the MMOTM tree is likely to be a frustrating experience, though;
 there is a definite chance that it will not even compile.
 
-The other -next tree, started more recently, is linux-next, maintained by
+The primary tree for next-cycle patch merging is linux-next, maintained by
 Stephen Rothwell.  The linux-next tree is, by design, a snapshot of what
 the mainline is expected to look like after the next merge window closes.
 Linux-next trees are announced on the linux-kernel and linux-next mailing
@@ -287,25 +293,14 @@
 
 	http://linux.f-seidel.de/linux-next/pmwiki/
 
-How the linux-next tree will fit into the development process is still
-changing.  As of this writing, the first full development cycle involving
-linux-next (2.6.26) is coming to an end; thus far, it has proved to be a
-valuable resource for finding and fixing integration problems before the
-beginning of the merge window.  See http://lwn.net/Articles/287155/ for
-more information on how linux-next has worked to set up the 2.6.27 merge
-window.
+Linux-next has become an integral part of the kernel development process;
+all patches merged during a given merge window should really have found
+their way into linux-next some time before the merge window opens.
 
-Some developers have begun to suggest that linux-next should be used as the
-target for future development as well.  The linux-next tree does tend to be
-far ahead of the mainline and is more representative of the tree into which
-any new work will be merged.  The downside to this idea is that the
-volatility of linux-next tends to make it a difficult development target.
-See http://lwn.net/Articles/289013/ for more information on this topic, and
-stay tuned; much is still in flux where linux-next is involved.
 
 2.4.1: STAGING TREES
 
-The kernel source tree now contains the drivers/staging/ directory, where
+The kernel source tree contains the drivers/staging/ directory, where
 many sub-directories for drivers or filesystems that are on their way to
 being added to the kernel tree live.  They remain in drivers/staging while
 they still need more work; once complete, they can be moved into the
@@ -313,15 +308,23 @@
 up to Linux kernel coding or quality standards, but people may want to use
 them and track development.
 
-Greg Kroah-Hartman currently (as of 2.6.36) maintains the staging tree.
-Drivers that still need work are sent to him, with each driver having
-its own subdirectory in drivers/staging/.  Along with the driver source
-files, a TODO file should be present in the directory as well.  The TODO
-file lists the pending work that the driver needs for acceptance into
-the kernel proper, as well as a list of people that should be Cc'd for any
-patches to the driver.  Staging drivers that don't currently build should
-have their config entries depend upon CONFIG_BROKEN.  Once they can
-be successfully built without outside patches, CONFIG_BROKEN can be removed.
+Greg Kroah-Hartman currently maintains the staging tree.  Drivers that
+still need work are sent to him, with each driver having its own
+subdirectory in drivers/staging/.  Along with the driver source files, a
+TODO file should be present in the directory as well.  The TODO file lists
+the pending work that the driver needs for acceptance into the kernel
+proper, as well as a list of people that should be Cc'd for any patches to
+the driver.  Current rules require that drivers contributed to staging
+must, at a minimum, compile properly.
+
+Staging can be a relatively easy way to get new drivers into the mainline
+where, with luck, they will come to the attention of other developers and
+improve quickly.  Entry into staging is not the end of the story, though;
+code in staging which is not seeing regular progress will eventually be
+removed.  Distributors also tend to be relatively reluctant to enable
+staging drivers.  So staging is, at best, a stop on the way toward becoming
+a proper mainline driver.
+
 
 2.5: TOOLS
 
@@ -347,11 +350,7 @@
 
 	http://git-scm.com/
 
-That page has pointers to documentation and tutorials.  One should be
-aware, in particular, of the Kernel Hacker's Guide to git, which has
-information specific to kernel development:
-
-	http://linux.yyz.us/git-howto.html
+That page has pointers to documentation and tutorials.
 
 Among the kernel developers who do not use git, the most popular choice is
 almost certainly Mercurial:
@@ -408,7 +407,7 @@
   important to filter on both the topic of interest (though note that
   long-running conversations can drift away from the original subject
   without changing the email subject line) and the people who are
-  participating.  
+  participating.
 
 - Do not feed the trolls.  If somebody is trying to stir up an angry
   response, ignore them.

diff --git a/Documentation/development-process/3.Early-stage b/Documentation/development-process/3.Early-stage
index 307a159..f87ba7b 100644
--- a/Documentation/development-process/3.Early-stage
+++ b/Documentation/development-process/3.Early-stage

@@ -110,8 +110,8 @@
 
  - The AppArmor security module made use of internal virtual filesystem
    data structures in ways which were considered to be unsafe and
-   unreliable.  This code has since been significantly reworked, but
-   remains outside of the mainline.
+   unreliable.  This concern (among others) kept AppArmor out of the
+   mainline for years.
 
 In each of these cases, a great deal of pain and extra work could have been
 avoided with some early discussion with the kernel developers.
@@ -138,6 +138,19 @@
 patches.  Those are the people who will be best placed to help with a new
 development project.
 
+The task of finding the right maintainer is sometimes challenging enough
+that the kernel developers have added a script to ease the process:
+
+	.../scripts/get_maintainer.pl
+
+This script will return the current maintainer(s) for a given file or
+directory when given the "-f" option.  If passed a patch on the
+command line, it will list the maintainers who should probably receive
+copies of the patch.  There are a number of options regulating how hard
+get_maintainer.pl will search for maintainers; please be careful about
+using the more aggressive options as you may end up including developers
+who have no real interest in the code you are modifying.
+
 If all else fails, talking to Andrew Morton can be an effective way to
 track down a maintainer for a specific piece of code.
 
@@ -155,11 +168,15 @@
 matter is (1) kernel developers tend to be busy, (2) there is no shortage
 of people with grand plans and little code (or even prospect of code) to
 back them up, and (3) nobody is obligated to review or comment on ideas
-posted by others.  If a request-for-comments posting yields little in the
-way of comments, do not assume that it means there is no interest in the
-project.  Unfortunately, you also cannot assume that there are no problems
-with your idea.  The best thing to do in this situation is to proceed,
-keeping the community informed as you go.
+posted by others.  Beyond that, high-level designs often hide problems
+which are only reviewed when somebody actually tries to implement those
+designs; for that reason, kernel developers would rather see the code.
+
+If a request-for-comments posting yields little in the way of comments, do
+not assume that it means there is no interest in the project.
+Unfortunately, you also cannot assume that there are no problems with your
+idea.  The best thing to do in this situation is to proceed, keeping the
+community informed as you go.
 
 
 3.5: GETTING OFFICIAL BUY-IN

diff --git a/Documentation/development-process/4.Coding b/Documentation/development-process/4.Coding
index 2278693..f3f1a46 100644
--- a/Documentation/development-process/4.Coding
+++ b/Documentation/development-process/4.Coding

@@ -131,6 +131,11 @@
 often does not apply to contemporary hardware.  Space *is* time, in that a
 larger program will run slower than one which is more compact.
 
+More recent compilers take an increasingly active role in deciding whether
+a given function should actually be inlined or not.  So the liberal
+placement of "inline" keywords may not just be excessive; it could also be
+irrelevant.
+
 
 * Locking
 
@@ -285,6 +290,13 @@
 distributor does not package it); it can then be run on the code by adding
 "C=1" to your make command.
 
+The "Coccinelle" tool (http://coccinelle.lip6.fr/) is able to find a wide
+variety of potential coding problems; it can also propose fixes for those
+problems.  Quite a few "semantic patches" for the kernel have been packaged
+under the scripts/coccinelle directory; running "make coccicheck" will run
+through those semantic patches and report on any problems found.  See
+Documentation/coccinelle.txt for more information.
+
 Other kinds of portability errors are best found by compiling your code for
 other architectures.  If you do not happen to have an S/390 system or a
 Blackfin development board handy, you can still perform the compilation
@@ -308,7 +320,9 @@
 changelog.  Log entries should describe the problem being solved, the form
 of the solution, the people who worked on the patch, any relevant
 effects on performance, and anything else that might be needed to
-understand the patch.
+understand the patch.  Be sure that the changelog says *why* the patch is
+worth applying; a surprising number of developers fail to provide that
+information.
 
 Any code which adds a new user-space interface - including new sysfs or
 /proc files - should include documentation of that interface which enables
@@ -321,7 +335,7 @@
 appropriate entries to this file.
 
 Any new configuration options must be accompanied by help text which
-clearly explains the options and when the user might want to select them. 
+clearly explains the options and when the user might want to select them.
 
 Internal API information for many subsystems is documented by way of
 specially-formatted comments; these comments can be extracted and formatted
@@ -372,7 +386,8 @@
 lead to literally hundreds or thousands of changes - many of which are
 likely to conflict with work being done by other developers.  Needless to
 say, this can be a large job, so it is best to be sure that the
-justification is solid.
+justification is solid.  Note that the Coccinelle tool can help with
+wide-ranging API changes.
 
 When making an incompatible API change, one should, whenever possible,
 ensure that code which has not been updated is caught by the compiler.

diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting
index f622c1e..903a254 100644
--- a/Documentation/development-process/5.Posting
+++ b/Documentation/development-process/5.Posting

@@ -60,12 +60,15 @@
 
 Patches must be prepared against a specific version of the kernel.  As a
 general rule, a patch should be based on the current mainline as found in
-Linus's git tree.  It may become necessary to make versions against -mm,
-linux-next, or a subsystem tree, though, to facilitate wider testing and
-review.  Depending on the area of your patch and what is going on
-elsewhere, basing a patch against these other trees can require a
-significant amount of work resolving conflicts and dealing with API
-changes.
+Linus's git tree.  When basing on mainline, start with a well-known release
+point - a stable or -rc release - rather than branching off the mainline at
+an arbitrary spot.
+
+It may become necessary to make versions against -mm, linux-next, or a
+subsystem tree, though, to facilitate wider testing and review.  Depending
+on the area of your patch and what is going on elsewhere, basing a patch
+against these other trees can require a significant amount of work
+resolving conflicts and dealing with API changes.
 
 Only the most simple changes should be formatted as a single patch;
 everything else should be made as a logical series of changes.  Splitting
@@ -100,11 +103,11 @@
    result is a broken kernel, you will make life harder for developers and
    users who are engaging in the noble work of tracking down problems.
 
- - Do not overdo it, though.  One developer recently posted a set of edits
+ - Do not overdo it, though.  One developer once posted a set of edits
    to a single file as 500 separate patches - an act which did not make him
    the most popular person on the kernel mailing list.  A single patch can
    be reasonably large as long as it still contains a single *logical*
-   change. 
+   change.
 
  - It can be tempting to add a whole new infrastructure with a series of
    patches, but to leave that infrastructure unused until the final patch
@@ -162,7 +165,8 @@
 for the change as well as possible given the one-line constraint.  The
 detailed description can then amplify on those topics and provide any
 needed additional information.  If the patch fixes a bug, cite the commit
-which introduced the bug if possible.  If a problem is associated with
+which introduced the bug if possible (and please provide both the commit ID
+and the title when citing commits).  If a problem is associated with
 specific log or compiler output, include that output to help others
 searching for a solution to the same problem.  If the change is meant to
 support other changes coming in later patch, say so.  If internal APIs are
@@ -230,7 +234,7 @@
    which have had gratuitous white-space changes or line wrapping performed
    by the mail client will not apply at the other end, and often will not
    be examined in any detail.  If there is any doubt at all, mail the patch
-   to yourself and convince yourself that it shows up intact.  
+   to yourself and convince yourself that it shows up intact.
 
    Documentation/email-clients.txt has some helpful hints on making
    specific mail clients work for sending patches.
@@ -287,7 +291,7 @@
 
 where "nn" is the ordinal number of the patch, "mm" is the total number of
 patches in the series, and "subsys" is the name of the affected subsystem.
-Clearly, nn/mm can be omitted for a single, standalone patch.  
+Clearly, nn/mm can be omitted for a single, standalone patch.
 
 If you have a significant series of patches, it is customary to send an
 introductory description as part zero.  This convention is not universally
@@ -299,5 +303,5 @@
 sent as a reply to the first part so that they all thread together at the
 receiving end.  Tools like git and quilt have commands to mail out a set of
 patches with the proper threading.  If you have a long series, though, and
-are using git, please provide the --no-chain-reply-to option to avoid
+are using git, please stay away from the --chain-reply-to option to avoid
 creating exceptionally deep nesting.

diff --git a/Documentation/development-process/6.Followthrough b/Documentation/development-process/6.Followthrough
index a8fba3d8..41d324a 100644
--- a/Documentation/development-process/6.Followthrough
+++ b/Documentation/development-process/6.Followthrough

@@ -66,6 +66,11 @@
 that you don't realize that something is fundamentally wrong or, perhaps,
 you're not even solving the right problem.
 
+Andrew Morton has suggested that every review comment which does not result
+in a code change should result in an additional code comment instead; that
+can help future reviewers avoid the questions which came up the first time
+around.
+
 One fatal mistake is to ignore review comments in the hope that they will
 go away.  They will not go away.  If you repost code without having
 responded to the comments you got the time before, you're likely to find
@@ -100,7 +105,7 @@
 subsystem to the next; each maintainer has his or her own way of doing
 things.  In particular, there may be more than one tree - one, perhaps,
 dedicated to patches planned for the next merge window, and another for
-longer-term work.  
+longer-term work.
 
 For patches applying to areas for which there is no obvious subsystem tree
 (memory management patches, for example), the default tree often ends up
@@ -109,11 +114,10 @@
 
 Inclusion into a subsystem tree can bring a higher level of visibility to a
 patch.  Now other developers working with that tree will get the patch by
-default.  Subsystem trees typically feed into -mm and linux-next as well,
-making their contents visible to the development community as a whole.  At
-this point, there's a good chance that you will get more comments from a
-new set of reviewers; these comments need to be answered as in the previous
-round.
+default.  Subsystem trees typically feed linux-next as well, making their
+contents visible to the development community as a whole.  At this point,
+there's a good chance that you will get more comments from a new set of
+reviewers; these comments need to be answered as in the previous round.
 
 What may also happen at this point, depending on the nature of your patch,
 is that conflicts with work being done by others turn up.  In the worst

diff --git a/Documentation/development-process/7.AdvancedTopics b/Documentation/development-process/7.AdvancedTopics
index 8371794..26dc3fa 100644
--- a/Documentation/development-process/7.AdvancedTopics
+++ b/Documentation/development-process/7.AdvancedTopics

@@ -119,7 +119,7 @@
 	to trust things *without* then having to go and check every
 	individual change by hand.
 
-(http://lwn.net/Articles/224135/).  
+(http://lwn.net/Articles/224135/).
 
 To avoid this kind of situation, ensure that all patches within a given
 branch stick closely to the associated topic; a "driver fixes" branch
@@ -138,7 +138,7 @@
 your tree is, what branch to pull, and what changes will result from the
 pull.  The git request-pull command can be helpful in this regard; it will
 format the request as other developers expect, and will also check to be
-sure that you have remembered to push those changes to the public server. 
+sure that you have remembered to push those changes to the public server.
 
 
 7.2: REVIEWING PATCHES

diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt
new file mode 100644
index 0000000..c8efdfd
--- /dev/null
+++ b/Documentation/device-mapper/dm-flakey.txt

@@ -0,0 +1,17 @@
+dm-flakey
+=========
+
+This target is the same as the linear target except that it returns I/O
+errors periodically.  It's been found useful in simulating failing
+devices for testing purposes.
+
+Starting from the time the table is loaded, the device is available for
+<up interval> seconds, then returns errors for <down interval> seconds,
+and then this cycle repeats.
+
+Parameters: <dev path> <offset> <up interval> <down interval>
+    <dev path>: Full pathname to the underlying block-device, or a
+                "major:minor" device-number.
+    <offset>: Starting sector within the device.
+    <up interval>: Number of seconds device is available.
+    <down interval>: Number of seconds device returns errors.

diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt
index e6c4b75..f959909 100644
--- a/Documentation/dynamic-debug-howto.txt
+++ b/Documentation/dynamic-debug-howto.txt

@@ -6,7 +6,7 @@
 
 Dynamic debug is designed to allow you to dynamically enable/disable kernel
 code to obtain additional kernel information. Currently, if
-CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_debug() calls can be
+CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_dbg() calls can be
 dynamically enabled per-callsite.
 
 Dynamic debug has even more useful features:
@@ -26,7 +26,7 @@
 Controlling dynamic debug Behaviour
 ===================================
 
-The behaviour of pr_debug()/dev_debug()s are controlled via writing to a
+The behaviour of pr_debug()/dev_dbg()s are controlled via writing to a
 control file in the 'debugfs' filesystem. Thus, you must first mount the debugfs
 filesystem, in order to make use of this feature. Subsequently, we refer to the
 control file as: <debugfs>/dynamic_debug/control. For example, if you want to

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 2e994ef..61b31ac 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking

@@ -128,7 +128,7 @@
 destroy_inode:
 dirty_inode:				(must not sleep)
 write_inode:
-drop_inode:				!!!inode_lock!!!
+drop_inode:				!!!inode->i_lock!!!
 evict_inode:
 put_super:		write
 write_super:		read

diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 6ab9442..6b05046 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt

@@ -367,12 +367,47 @@
 			minimizes the impact on the systme performance
 			while file system's inode table is being initialized.
 
-discard		Controls whether ext4 should issue discard/TRIM
+discard			Controls whether ext4 should issue discard/TRIM
 nodiscard(*)		commands to the underlying block device when
 			blocks are freed.  This is useful for SSD devices
 			and sparse/thinly-provisioned LUNs, but it is off
 			by default until sufficient testing has been done.
 
+nouid32			Disables 32-bit UIDs and GIDs.  This is for
+			interoperability  with  older kernels which only
+			store and expect 16-bit values.
+
+resize			Allows to resize filesystem to the end of the last
+			existing block group, further resize has to be done
+			with resize2fs either online, or offline. It can be
+			used only with conjunction with remount.
+
+block_validity		This options allows to enables/disables the in-kernel
+noblock_validity	facility for tracking filesystem metadata blocks
+			within internal data structures. This allows multi-
+			block allocator and other routines to quickly locate
+			extents which might overlap with filesystem metadata
+			blocks. This option is intended for debugging
+			purposes and since it negatively affects the
+			performance, it is off by default.
+
+dioread_lock		Controls whether or not ext4 should use the DIO read
+dioread_nolock		locking. If the dioread_nolock option is specified
+			ext4 will allocate uninitialized extent before buffer
+			write and convert the extent to initialized after IO
+			completes. This approach allows ext4 code to avoid
+			using inode mutex, which improves scalability on high
+			speed storages. However this does not work with nobh
+			option and the mount will fail. Nor does it work with
+			data journaling and dioread_nolock option will be
+			ignored with kernel warning. Note that dioread_nolock
+			code path is only used for extent-based files.
+			Because of the restrictions this options comprises
+			it is off by default (e.g. dioread_lock).
+
+i_version		Enable 64-bit inode version support. This option is
+			off by default.
+
 Data Mode
 =========
 There are 3 different data modes:
@@ -400,6 +435,176 @@
 outperforms all others modes.  Currently ext4 does not have delayed
 allocation support if this data journalling mode is selected.
 
+/proc entries
+=============
+
+Information about mounted ext4 file systems can be found in
+/proc/fs/ext4.  Each mounted filesystem will have a directory in
+/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or
+/proc/fs/ext4/dm-0).   The files in each per-device directory are shown
+in table below.
+
+Files in /proc/fs/ext4/<devname>
+..............................................................................
+ File            Content
+ mb_groups       details of multiblock allocator buddy cache of free blocks
+..............................................................................
+
+/sys entries
+============
+
+Information about mounted ext4 file systems can be found in
+/sys/fs/ext4.  Each mounted filesystem will have a directory in
+/sys/fs/ext4 based on its device name (i.e., /sys/fs/ext4/hdc or
+/sys/fs/ext4/dm-0).   The files in each per-device directory are shown
+in table below.
+
+Files in /sys/fs/ext4/<devname>
+(see also Documentation/ABI/testing/sysfs-fs-ext4)
+..............................................................................
+ File                         Content
+
+ delayed_allocation_blocks    This file is read-only and shows the number of
+                              blocks that are dirty in the page cache, but
+                              which do not have their location in the
+                              filesystem allocated yet.
+
+ inode_goal                   Tuning parameter which (if non-zero) controls
+                              the goal inode used by the inode allocator in
+                              preference to all other allocation heuristics.
+                              This is intended for debugging use only, and
+                              should be 0 on production systems.
+
+ inode_readahead_blks         Tuning parameter which controls the maximum
+                              number of inode table blocks that ext4's inode
+                              table readahead algorithm will pre-read into
+                              the buffer cache
+
+ lifetime_write_kbytes        This file is read-only and shows the number of
+                              kilobytes of data that have been written to this
+                              filesystem since it was created.
+
+ max_writeback_mb_bump        The maximum number of megabytes the writeback
+                              code will try to write out before move on to
+                              another inode.
+
+ mb_group_prealloc            The multiblock allocator will round up allocation
+                              requests to a multiple of this tuning parameter if
+                              the stripe size is not set in the ext4 superblock
+
+ mb_max_to_scan               The maximum number of extents the multiblock
+                              allocator will search to find the best extent
+
+ mb_min_to_scan               The minimum number of extents the multiblock
+                              allocator will search to find the best extent
+
+ mb_order2_req                Tuning parameter which controls the minimum size
+                              for requests (as a power of 2) where the buddy
+                              cache is used
+
+ mb_stats                     Controls whether the multiblock allocator should
+                              collect statistics, which are shown during the
+                              unmount. 1 means to collect statistics, 0 means
+                              not to collect statistics
+
+ mb_stream_req                Files which have fewer blocks than this tunable
+                              parameter will have their blocks allocated out
+                              of a block group specific preallocation pool, so
+                              that small files are packed closely together.
+                              Each large file will have its blocks allocated
+                              out of its own unique preallocation pool.
+
+ session_write_kbytes         This file is read-only and shows the number of
+                              kilobytes of data that have been written to this
+                              filesystem since it was mounted.
+..............................................................................
+
+Ioctls
+======
+
+There is some Ext4 specific functionality which can be accessed by applications
+through the system call interfaces. The list of all Ext4 specific ioctls are
+shown in the table below.
+
+Table of Ext4 specific ioctls
+..............................................................................
+ Ioctl			      Description
+ EXT4_IOC_GETFLAGS	      Get additional attributes associated with inode.
+			      The ioctl argument is an integer bitfield, with
+			      bit values described in ext4.h. This ioctl is an
+			      alias for FS_IOC_GETFLAGS.
+
+ EXT4_IOC_SETFLAGS	      Set additional attributes associated with inode.
+			      The ioctl argument is an integer bitfield, with
+			      bit values described in ext4.h. This ioctl is an
+			      alias for FS_IOC_SETFLAGS.
+
+ EXT4_IOC_GETVERSION
+ EXT4_IOC_GETVERSION_OLD
+			      Get the inode i_generation number stored for
+			      each inode. The i_generation number is normally
+			      changed only when new inode is created and it is
+			      particularly useful for network filesystems. The
+			      '_OLD' version of this ioctl is an alias for
+			      FS_IOC_GETVERSION.
+
+ EXT4_IOC_SETVERSION
+ EXT4_IOC_SETVERSION_OLD
+			      Set the inode i_generation number stored for
+			      each inode. The '_OLD' version of this ioctl
+			      is an alias for FS_IOC_SETVERSION.
+
+ EXT4_IOC_GROUP_EXTEND	      This ioctl has the same purpose as the resize
+			      mount option. It allows to resize filesystem
+			      to the end of the last existing block group,
+			      further resize has to be done with resize2fs,
+			      either online, or offline. The argument points
+			      to the unsigned logn number representing the
+			      filesystem new block count.
+
+ EXT4_IOC_MOVE_EXT	      Move the block extents from orig_fd (the one
+			      this ioctl is pointing to) to the donor_fd (the
+			      one specified in move_extent structure passed
+			      as an argument to this ioctl). Then, exchange
+			      inode metadata between orig_fd and donor_fd.
+			      This is especially useful for online
+			      defragmentation, because the allocator has the
+			      opportunity to allocate moved blocks better,
+			      ideally into one contiguous extent.
+
+ EXT4_IOC_GROUP_ADD	      Add a new group descriptor to an existing or
+			      new group descriptor block. The new group
+			      descriptor is described by ext4_new_group_input
+			      structure, which is passed as an argument to
+			      this ioctl. This is especially useful in
+			      conjunction with EXT4_IOC_GROUP_EXTEND,
+			      which allows online resize of the filesystem
+			      to the end of the last existing block group.
+			      Those two ioctls combined is used in userspace
+			      online resize tool (e.g. resize2fs).
+
+ EXT4_IOC_MIGRATE	      This ioctl operates on the filesystem itself.
+			      It converts (migrates) ext3 indirect block mapped
+			      inode to ext4 extent mapped inode by walking
+			      through indirect block mapping of the original
+			      inode and converting contiguous block ranges
+			      into ext4 extents of the temporary inode. Then,
+			      inodes are swapped. This ioctl might help, when
+			      migrating from ext3 to ext4 filesystem, however
+			      suggestion is to create fresh ext4 filesystem
+			      and copy data from the backup. Note, that
+			      filesystem has to support extents for this ioctl
+			      to work.
+
+ EXT4_IOC_ALLOC_DA_BLKS	      Force all of the delay allocated blocks to be
+			      allocated to preserve application-expected ext3
+			      behaviour. Note that this will also start
+			      triggering a write of the data blocks, but this
+			      behaviour may change in the future as it is
+			      not necessary and has been done this way only
+			      for sake of simplicity.
+..............................................................................
+
 References
 ==========
 

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 0c986c9..6e29954 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting

@@ -298,11 +298,14 @@
 remaining links or not.  Caller does *not* evict the pagecache or inode-associated
 metadata buffers; getting rid of those is responsibility of method, as it had
 been for ->delete_inode().
-	->drop_inode() returns int now; it's called on final iput() with inode_lock
-held and it returns true if filesystems wants the inode to be dropped.  As before,
-generic_drop_inode() is still the default and it's been updated appropriately.
-generic_delete_inode() is also alive and it consists simply of return 1.  Note that
-all actual eviction work is done by caller after ->drop_inode() returns.
+
+	->drop_inode() returns int now; it's called on final iput() with
+inode->i_lock held and it returns true if filesystems wants the inode to be
+dropped.  As before, generic_drop_inode() is still the default and it's been
+updated appropriately.  generic_delete_inode() is also alive and it consists
+simply of return 1.  Note that all actual eviction work is done by caller after
+->drop_inode() returns.
+
 	clear_inode() is gone; use end_writeback() instead.  As before, it must
 be called exactly once on each call of ->evict_inode() (as it used to be for
 each call of ->delete_inode()).  Unlike before, if you are using inode-associated
@@ -397,6 +400,9 @@
 
 --
 [mandatory]
+
+--
+[mandatory]
 	->get_sb() is gone.  Switch to use of ->mount().  Typically it's just
 a matter of switching from calling get_sb_... to mount_... and changing the
 function type.  If you were doing it manually, just switch from setting ->mnt_root

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 306f0ae..80815ed 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt

@@ -254,7 +254,7 @@
 	should be synchronous or not, not all filesystems check this flag.
 
   drop_inode: called when the last access to the inode is dropped,
-	with the inode_lock spinlock held.
+	with the inode->i_lock spinlock held.
 
 	This method should be either NULL (normal UNIX filesystem
 	semantics) or "generic_delete_inode" (for filesystems that do not

diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg
index 4d0bc70..df02245 100644
--- a/Documentation/hwmon/f71882fg
+++ b/Documentation/hwmon/f71882fg

@@ -2,6 +2,10 @@
 ======================
 
 Supported chips:
+  * Fintek F71808E
+    Prefix: 'f71808e'
+    Addresses scanned: none, address read from Super I/O config space
+    Datasheet: Not public
   * Fintek F71858FG
     Prefix: 'f71858fg'
     Addresses scanned: none, address read from Super I/O config space
@@ -26,10 +30,25 @@
     Prefix: 'f71889ed'
     Addresses scanned: none, address read from Super I/O config space
     Datasheet: Should become available on the Fintek website soon
+  * Fintek F71889A
+    Prefix: 'f71889a'
+    Addresses scanned: none, address read from Super I/O config space
+    Datasheet: Should become available on the Fintek website soon
   * Fintek F8000
     Prefix: 'f8000'
     Addresses scanned: none, address read from Super I/O config space
     Datasheet: Not public
+  * Fintek F81801U
+    Prefix: 'f71889fg'
+    Addresses scanned: none, address read from Super I/O config space
+    Datasheet: Not public
+    Note: This is the 64-pin variant of the F71889FG, they have the
+	  same device ID and are fully compatible as far as hardware
+	  monitoring is concerned.
+  * Fintek F81865F
+    Prefix: 'f81865f'
+    Addresses scanned: none, address read from Super I/O config space
+    Datasheet: Available from the Fintek website
 
 Author: Hans de Goede <hdegoede@redhat.com>
 

diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt
index 8239ebb..9996199 100644
--- a/Documentation/scheduler/sched-design-CFS.txt
+++ b/Documentation/scheduler/sched-design-CFS.txt

@@ -164,7 +164,7 @@
    It puts the scheduling entity (task) into the red-black tree and
    increments the nr_running variable.
 
- - dequeue_tree(...)
+ - dequeue_task(...)
 
    When a task is no longer runnable, this function is called to keep the
    corresponding scheduling entity out of the red-black tree.  It decrements
@@ -195,11 +195,6 @@
    This function is mostly called from time tick functions; it might lead to
    process switch.  This drives the running preemption.
 
- - task_new(...)
-
-   The core scheduler gives the scheduling module an opportunity to manage new
-   task startup.  The CFS scheduling module uses it for group scheduling, while
-   the scheduling module for a real-time task does not use it.
 
 
 

diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py
index dbeb8a0..7ef9b84 100755
--- a/Documentation/target/tcm_mod_builder.py
+++ b/Documentation/target/tcm_mod_builder.py

@@ -239,8 +239,8 @@
 	buf += "#include <target/target_core_configfs.h>\n"
 	buf += "#include <target/target_core_base.h>\n"
 	buf += "#include <target/configfs_macros.h>\n\n"
-	buf += "#include <" + fabric_mod_name + "_base.h>\n"
-	buf += "#include <" + fabric_mod_name + "_fabric.h>\n\n"
+	buf += "#include \"" + fabric_mod_name + "_base.h\"\n"
+	buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n"
 
 	buf += "/* Local pointer to allocated TCM configfs fabric module */\n"
 	buf += "struct target_fabric_configfs *" + fabric_mod_name + "_fabric_configfs;\n\n"
@@ -289,6 +289,7 @@
 	buf += "{\n"
 	buf += "	struct " + fabric_mod_name + "_nacl *nacl = container_of(se_acl,\n"
 	buf += "				struct " + fabric_mod_name + "_nacl, se_node_acl);\n"
+	buf += "	core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1);\n"
 	buf += "	kfree(nacl);\n"
 	buf += "}\n\n"
 
@@ -583,9 +584,9 @@
 	buf += "#include <target/target_core_fabric_lib.h>\n"
 	buf += "#include <target/target_core_device.h>\n"
 	buf += "#include <target/target_core_tpg.h>\n"
-	buf += "#include <target/target_core_configfs.h>\n"
-	buf += "#include <" + fabric_mod_name + "_base.h>\n"
-	buf += "#include <" + fabric_mod_name + "_fabric.h>\n\n"
+	buf += "#include <target/target_core_configfs.h>\n\n"
+	buf += "#include \"" + fabric_mod_name + "_base.h\"\n"
+	buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n"
 
 	buf += "int " + fabric_mod_name + "_check_true(struct se_portal_group *se_tpg)\n"
 	buf += "{\n"
@@ -973,14 +974,13 @@
 def tcm_mod_build_kbuild(fabric_mod_dir_var, fabric_mod_name):
 
 	buf = ""
-	f = fabric_mod_dir_var + "/Kbuild"
+	f = fabric_mod_dir_var + "/Makefile"
 	print "Writing file: " + f
 
 	p = open(f, 'w')
 	if not p:
 		tcm_mod_err("Unable to open file: " + f)
 
-	buf = "EXTRA_CFLAGS += -I$(srctree)/drivers/target/ -I$(srctree)/include/ -I$(srctree)/drivers/scsi/ -I$(srctree)/include/scsi/ -I$(srctree)/drivers/target/" + fabric_mod_name + "\n\n"
 	buf += fabric_mod_name + "-objs			:= " + fabric_mod_name + "_fabric.o \\\n"
 	buf += "					   " + fabric_mod_name + "_configfs.o\n"
 	buf += "obj-$(CONFIG_" + fabric_mod_name.upper() + ")		+= " + fabric_mod_name + ".o\n"
@@ -1018,7 +1018,7 @@
 
 def tcm_mod_add_kbuild(tcm_dir, fabric_mod_name):
 	buf = "obj-$(CONFIG_" + fabric_mod_name.upper() + ")	+= " + fabric_mod_name.lower() + "/\n"
-	kbuild = tcm_dir + "/drivers/target/Kbuild"
+	kbuild = tcm_dir + "/drivers/target/Makefile"
 
 	f = open(kbuild, 'a')
 	f.write(buf)
@@ -1064,7 +1064,7 @@
 	tcm_mod_build_kbuild(fabric_mod_dir, fabric_mod_name)
 	tcm_mod_build_kconfig(fabric_mod_dir, fabric_mod_name)
 
-	input = raw_input("Would you like to add " + fabric_mod_name + "to drivers/target/Kbuild..? [yes,no]: ")
+	input = raw_input("Would you like to add " + fabric_mod_name + "to drivers/target/Makefile..? [yes,no]: ")
 	if input == "yes" or input == "y":
 		tcm_mod_add_kbuild(tcm_dir, fabric_mod_name)
 

diff --git a/MAINTAINERS b/MAINTAINERS
index 749f9cd..8aa1cac 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -548,10 +548,8 @@
 F:	sound/aoa/
 
 APM DRIVER
-M:	Stephen Rothwell <sfr@canb.auug.org.au>
 L:	linux-laptop@vger.kernel.org
-W:	http://www.canb.auug.org.au/~sfr/
-S:	Supported
+S:	Orphan
 F:	arch/x86/kernel/apm_32.c
 F:	include/linux/apm_bios.h
 
@@ -5291,6 +5289,11 @@
 F:	drivers/mtd/nand/r852.c
 F:	drivers/mtd/nand/r852.h
 
+RICOH R5C592 MEMORYSTICK DRIVER
+M:	Maxim Levitsky <maximlevitsky@gmail.com>
+S:	Maintained
+F:	drivers/memstick/host/r592.*
+
 RISCOM8 DRIVER
 S:	Orphan
 F:	Documentation/serial/riscom8.txt
@@ -6628,6 +6631,7 @@
 
 USER-MODE LINUX (UML)
 M:	Jeff Dike <jdike@addtoit.com>
+M:	Richard Weinberger <richard@nod.at>
 L:	user-mode-linux-devel@lists.sourceforge.net
 L:	user-mode-linux-user@lists.sourceforge.net
 W:	http://user-mode-linux.sourceforge.net

diff --git a/arch/arm/mach-ep93xx/gpio.c b/arch/arm/mach-ep93xx/gpio.c
index a889fa7..34e071d 100644
--- a/arch/arm/mach-ep93xx/gpio.c
+++ b/arch/arm/mach-ep93xx/gpio.c

@@ -360,52 +360,14 @@
 	gpio = ep93xx_chip->chip.base;
 	for (i = 0; i < chip->ngpio; i++, gpio++) {
 		int is_out = data_dir_reg & (1 << i);
+		int irq = gpio_to_irq(gpio);
 
-		seq_printf(s, " %s%d gpio-%-3d (%-12s) %s %s",
+		seq_printf(s, " %s%d gpio-%-3d (%-12s) %s %s %s\n",
 				chip->label, i, gpio,
 				gpiochip_is_requested(chip, i) ? : "",
 				is_out ? "out" : "in ",
-				(data_reg & (1 << i)) ? "hi" : "lo");
-
-		if (!is_out) {
-			int irq = gpio_to_irq(gpio);
-			struct irq_desc *desc = irq_desc + irq;
-
-			if (irq >= 0 && desc->action) {
-				char *trigger;
-
-				switch (desc->status & IRQ_TYPE_SENSE_MASK) {
-				case IRQ_TYPE_NONE:
-					trigger = "(default)";
-					break;
-				case IRQ_TYPE_EDGE_FALLING:
-					trigger = "edge-falling";
-					break;
-				case IRQ_TYPE_EDGE_RISING:
-					trigger = "edge-rising";
-					break;
-				case IRQ_TYPE_EDGE_BOTH:
-					trigger = "edge-both";
-					break;
-				case IRQ_TYPE_LEVEL_HIGH:
-					trigger = "level-high";
-					break;
-				case IRQ_TYPE_LEVEL_LOW:
-					trigger = "level-low";
-					break;
-				default:
-					trigger = "?trigger?";
-					break;
-				}
-
-				seq_printf(s, " irq-%d %s%s",
-						irq, trigger,
-						(desc->status & IRQ_WAKEUP)
-							? " wakeup" : "");
-			}
-		}
-
-		seq_printf(s, "\n");
+				(data_reg & (1<<  i)) ? "hi" : "lo",
+				(!is_out && irq>= 0) ? "(interrupt)" : "");
 	}
 }
 

diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index c936c6d..f3a7b10 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c

@@ -285,19 +285,6 @@
 	return 0;
 }
 
-static struct regulator_init_data omap4_panda_vaux1 = {
-	.constraints = {
-		.min_uV			= 1000000,
-		.max_uV			= 3000000,
-		.apply_uV		= true,
-		.valid_modes_mask	= REGULATOR_MODE_NORMAL
-					| REGULATOR_MODE_STANDBY,
-		.valid_ops_mask	 = REGULATOR_CHANGE_VOLTAGE
-					| REGULATOR_CHANGE_MODE
-					| REGULATOR_CHANGE_STATUS,
-	},
-};
-
 static struct regulator_init_data omap4_panda_vaux2 = {
 	.constraints = {
 		.min_uV			= 1200000,
@@ -353,19 +340,6 @@
 	},
 };
 
-static struct regulator_init_data omap4_panda_vusim = {
-	.constraints = {
-		.min_uV			= 1200000,
-		.max_uV			= 2900000,
-		.apply_uV		= true,
-		.valid_modes_mask	= REGULATOR_MODE_NORMAL
-					| REGULATOR_MODE_STANDBY,
-		.valid_ops_mask	 = REGULATOR_CHANGE_VOLTAGE
-					| REGULATOR_CHANGE_MODE
-					| REGULATOR_CHANGE_STATUS,
-	},
-};
-
 static struct regulator_init_data omap4_panda_vana = {
 	.constraints = {
 		.min_uV			= 2100000,
@@ -424,12 +398,10 @@
 	/* Regulators */
 	.vmmc		= &omap4_panda_vmmc,
 	.vpp		= &omap4_panda_vpp,
-	.vusim		= &omap4_panda_vusim,
 	.vana		= &omap4_panda_vana,
 	.vcxio		= &omap4_panda_vcxio,
 	.vdac		= &omap4_panda_vdac,
 	.vusb		= &omap4_panda_vusb,
-	.vaux1		= &omap4_panda_vaux1,
 	.vaux2		= &omap4_panda_vaux2,
 	.vaux3		= &omap4_panda_vaux3,
 	.clk32kg	= &omap4_panda_clk32kg,

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index e978514..84d1b73 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c

@@ -66,7 +66,7 @@
 
 	WARN(IS_ERR(od), "could not build omap_device for %s\n", oh_name);
 
-	return PTR_ERR(od);
+	return IS_ERR(od) ? PTR_ERR(od) : 0;
 }
 postcore_initcall(omap3_l3_init);
 

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 6741743..493505c 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c

@@ -693,6 +693,7 @@
 {
 	u32 l, irq;
 	int cs, ret = -EINVAL;
+	int gpmc_irq;
 	char *ck = NULL;
 
 	if (cpu_is_omap24xx()) {
@@ -701,12 +702,15 @@
 			l = OMAP2420_GPMC_BASE;
 		else
 			l = OMAP34XX_GPMC_BASE;
+		gpmc_irq = INT_34XX_GPMC_IRQ;
 	} else if (cpu_is_omap34xx()) {
 		ck = "gpmc_fck";
 		l = OMAP34XX_GPMC_BASE;
+		gpmc_irq = INT_34XX_GPMC_IRQ;
 	} else if (cpu_is_omap44xx()) {
 		ck = "gpmc_ck";
 		l = OMAP44XX_GPMC_BASE;
+		gpmc_irq = OMAP44XX_IRQ_GPMC;
 	}
 
 	if (WARN_ON(!ck))
@@ -739,16 +743,17 @@
 	/* initalize the irq_chained */
 	irq = OMAP_GPMC_IRQ_BASE;
 	for (cs = 0; cs < GPMC_CS_NUM; cs++) {
-		set_irq_handler(irq, handle_simple_irq);
+		set_irq_chip_and_handler(irq, &dummy_irq_chip,
+						handle_simple_irq);
 		set_irq_flags(irq, IRQF_VALID);
 		irq++;
 	}
 
-	ret = request_irq(INT_34XX_GPMC_IRQ,
+	ret = request_irq(gpmc_irq,
 			gpmc_handle_irq, IRQF_SHARED, "gpmc", gpmc_base);
 	if (ret)
 		pr_err("gpmc: irq-%d could not claim: err %d\n",
-						INT_34XX_GPMC_IRQ, ret);
+						gpmc_irq, ret);
 	return ret;
 }
 postcore_initcall(gpmc_init);
@@ -757,8 +762,6 @@
 {
 	u8 cs;
 
-	if (irq != INT_34XX_GPMC_IRQ)
-		return IRQ_HANDLED;
 	/* check cs to invoke the irq */
 	cs = ((gpmc_read_reg(GPMC_PREFETCH_CONFIG1)) >> CS_NUM_SHIFT) & 0x7;
 	if (OMAP_GPMC_IRQ_BASE+cs <= OMAP_GPMC_IRQ_END)

diff --git a/arch/arm/mach-omap2/omap_l3_smx.c b/arch/arm/mach-omap2/omap_l3_smx.c
index 265bff3..5f2da756 100644
--- a/arch/arm/mach-omap2/omap_l3_smx.c
+++ b/arch/arm/mach-omap2/omap_l3_smx.c

@@ -226,7 +226,6 @@
 	struct omap3_l3         *l3;
 	struct resource         *res;
 	int                     ret;
-	int                     irq;
 
 	l3 = kzalloc(sizeof(*l3), GFP_KERNEL);
 	if (!l3) {
@@ -249,18 +248,17 @@
 		goto err2;
 	}
 
-	irq = platform_get_irq(pdev, 0);
-	ret = request_irq(irq, omap3_l3_app_irq,
+	l3->debug_irq = platform_get_irq(pdev, 0);
+	ret = request_irq(l3->debug_irq, omap3_l3_app_irq,
 		IRQF_DISABLED | IRQF_TRIGGER_RISING,
 		"l3-debug-irq", l3);
 	if (ret) {
 		dev_err(&pdev->dev, "couldn't request debug irq\n");
 		goto err3;
 	}
-	l3->debug_irq = irq;
 
-	irq = platform_get_irq(pdev, 1);
-	ret = request_irq(irq, omap3_l3_app_irq,
+	l3->app_irq = platform_get_irq(pdev, 1);
+	ret = request_irq(l3->app_irq, omap3_l3_app_irq,
 		IRQF_DISABLED | IRQF_TRIGGER_RISING,
 		"l3-app-irq", l3);
 
@@ -269,7 +267,6 @@
 		goto err4;
 	}
 
-	l3->app_irq = irq;
 	goto err0;
 
 err4:

diff --git a/arch/arm/mach-ux500/board-mop500-regulators.h b/arch/arm/mach-ux500/board-mop500-regulators.h
index 2675fae..f979b89 100644
--- a/arch/arm/mach-ux500/board-mop500-regulators.h
+++ b/arch/arm/mach-ux500/board-mop500-regulators.h

@@ -14,6 +14,8 @@
 #include <linux/regulator/machine.h>
 #include <linux/regulator/ab8500.h>
 
+extern struct ab8500_regulator_reg_init
+ab8500_regulator_reg_init[AB8500_NUM_REGULATOR_REGISTERS];
 extern struct regulator_init_data ab8500_regulators[AB8500_NUM_REGULATORS];
 
 #endif

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index 8790d984..d007645 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c

@@ -20,6 +20,7 @@
 #include <linux/amba/serial.h>
 #include <linux/spi/spi.h>
 #include <linux/mfd/ab8500.h>
+#include <linux/regulator/ab8500.h>
 #include <linux/mfd/tc3589x.h>
 #include <linux/leds-lp5521.h>
 #include <linux/input.h>

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index b3b0f0f..e5f6fc4 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c

@@ -78,7 +78,7 @@
  */
 struct meminfo meminfo;
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int free = 0, total = 0, reserved = 0;
 	int shared = 0, cached = 0, slab = 0, i;

diff --git a/arch/arm/plat-nomadik/gpio.c b/arch/arm/plat-nomadik/gpio.c
index 7062042..80643bc 100644
--- a/arch/arm/plat-nomadik/gpio.c
+++ b/arch/arm/plat-nomadik/gpio.c

@@ -832,51 +832,6 @@
 				: "?  ",
 			(mode < 0) ? "unknown" : modes[mode],
 			pull ? "pull" : "none");
-
-		if (!is_out) {
-			int		irq = gpio_to_irq(gpio);
-			struct irq_desc	*desc = irq_to_desc(irq);
-
-			/* This races with request_irq(), set_irq_type(),
-			 * and set_irq_wake() ... but those are "rare".
-			 *
-			 * More significantly, trigger type flags aren't
-			 * currently maintained by genirq.
-			 */
-			if (irq >= 0 && desc->action) {
-				char *trigger;
-
-				switch (desc->status & IRQ_TYPE_SENSE_MASK) {
-				case IRQ_TYPE_NONE:
-					trigger = "(default)";
-					break;
-				case IRQ_TYPE_EDGE_FALLING:
-					trigger = "edge-falling";
-					break;
-				case IRQ_TYPE_EDGE_RISING:
-					trigger = "edge-rising";
-					break;
-				case IRQ_TYPE_EDGE_BOTH:
-					trigger = "edge-both";
-					break;
-				case IRQ_TYPE_LEVEL_HIGH:
-					trigger = "level-high";
-					break;
-				case IRQ_TYPE_LEVEL_LOW:
-					trigger = "level-low";
-					break;
-				default:
-					trigger = "?trigger?";
-					break;
-				}
-
-				seq_printf(s, " irq-%d %s%s",
-					irq, trigger,
-					(desc->status & IRQ_WAKEUP)
-						? " wakeup" : "");
-			}
-		}
-
 		seq_printf(s, "\n");
 	}
 }

diff --git a/arch/arm/plat-omap/include/plat/irqs.h b/arch/arm/plat-omap/include/plat/irqs.h
index d779283..5a25098 100644
--- a/arch/arm/plat-omap/include/plat/irqs.h
+++ b/arch/arm/plat-omap/include/plat/irqs.h

@@ -416,7 +416,7 @@
 
 /* GPMC related */
 #define OMAP_GPMC_IRQ_BASE	(TWL_IRQ_END)
-#define OMAP_GPMC_NR_IRQS	7
+#define OMAP_GPMC_NR_IRQS	8
 #define OMAP_GPMC_IRQ_END	(OMAP_GPMC_IRQ_BASE + OMAP_GPMC_NR_IRQS)
 
 

diff --git a/arch/arm/plat-omap/include/plat/onenand.h b/arch/arm/plat-omap/include/plat/onenand.h
index cbe897c..2858667 100644
--- a/arch/arm/plat-omap/include/plat/onenand.h
+++ b/arch/arm/plat-omap/include/plat/onenand.h

@@ -32,6 +32,7 @@
 	int			dma_channel;
 	u8			flags;
 	u8			regulator_can_sleep;
+	u8			skip_initial_unlocking;
 };
 
 #define ONENAND_MAX_PARTITIONS 8

diff --git a/arch/arm/plat-pxa/include/plat/pxa3xx_nand.h b/arch/arm/plat-pxa/include/plat/pxa3xx_nand.h
index 01a8448..442301f 100644
--- a/arch/arm/plat-pxa/include/plat/pxa3xx_nand.h
+++ b/arch/arm/plat-pxa/include/plat/pxa3xx_nand.h

@@ -30,6 +30,7 @@
 };
 
 struct pxa3xx_nand_flash {
+	char		*name;
 	uint32_t	chip_id;
 	unsigned int	page_per_block; /* Pages per block (PG_PER_BLK) */
 	unsigned int	page_size;	/* Page size in bytes (PAGE_SZ) */
@@ -37,7 +38,6 @@
 	unsigned int	dfc_width;	/* Width of flash controller(DWIDTH_C) */
 	unsigned int	num_blocks;	/* Number of physical blocks in Flash */
 
-	struct pxa3xx_nand_cmdset *cmdset;	/* NAND command set */
 	struct pxa3xx_nand_timing *timing;	/* NAND Flash timing */
 };
 

diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index cd2062f..49642b5 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig

@@ -6,6 +6,11 @@
 	select HAVE_CLK
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
+	select HAVE_GENERIC_HARDIRQS
+	select GENERIC_IRQ_PROBE
+	select HARDIRQS_SW_RESEND
+	select GENERIC_IRQ_SHOW
+	select GENERIC_HARDIRQS_NO_DEPRECATED
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular
@@ -17,9 +22,6 @@
 config GENERIC_GPIO
 	def_bool y
 
-config GENERIC_HARDIRQS
-	def_bool y
-
 config STACKTRACE_SUPPORT
 	def_bool y
 
@@ -29,12 +31,6 @@
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
-config HARDIRQS_SW_RESEND
-	def_bool y
-
-config GENERIC_IRQ_PROBE
-	def_bool y
-
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
 

diff --git a/arch/avr32/boards/atngw100/mrmt.c b/arch/avr32/boards/atngw100/mrmt.c
index 7919be3..f914319 100644
--- a/arch/avr32/boards/atngw100/mrmt.c
+++ b/arch/avr32/boards/atngw100/mrmt.c

@@ -301,7 +301,7 @@
 	/* Select the Touchscreen interrupt pin mode */
 	at32_select_periph( GPIO_PIOB_BASE, 1 << (PB_EXTINT_BASE+TS_IRQ),
 			GPIO_PERIPH_A, AT32_GPIOF_DEGLITCH);
-	set_irq_type( AT32_EXTINT(TS_IRQ), IRQ_TYPE_EDGE_FALLING );
+	irq_set_irq_type(AT32_EXTINT(TS_IRQ), IRQ_TYPE_EDGE_FALLING);
 	at32_spi_setup_slaves(0,spi01_board_info,ARRAY_SIZE(spi01_board_info));
 	spi_register_board_info(spi01_board_info,ARRAY_SIZE(spi01_board_info));
 #endif

diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c
index 659d119..fafed4c 100644
--- a/arch/avr32/boards/atngw100/setup.c
+++ b/arch/avr32/boards/atngw100/setup.c

@@ -322,6 +322,6 @@
 	/* set_irq_type() after the arch_initcall for EIC has run, and
 	 * before the I2C subsystem could try using this IRQ.
 	 */
-	return set_irq_type(AT32_EXTINT(3), IRQ_TYPE_EDGE_FALLING);
+	return irq_set_irq_type(AT32_EXTINT(3), IRQ_TYPE_EDGE_FALLING);
 }
 arch_initcall(atngw100_arch_init);

diff --git a/arch/avr32/kernel/irq.c b/arch/avr32/kernel/irq.c
index 9604f77..bc3aa18 100644
--- a/arch/avr32/kernel/irq.c
+++ b/arch/avr32/kernel/irq.c

@@ -26,40 +26,3 @@
 {
 
 }
-
-#ifdef CONFIG_PROC_FS
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int i = *(loff_t *)v, cpu;
-	struct irqaction *action;
-	unsigned long flags;
-
-	if (i == 0) {
-		seq_puts(p, "           ");
-		for_each_online_cpu(cpu)
-			seq_printf(p, "CPU%d       ", cpu);
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
-		action = irq_desc[i].action;
-		if (!action)
-			goto unlock;
-
-		seq_printf(p, "%3d: ", i);
-		for_each_online_cpu(cpu)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
-		seq_printf(p, " %8s", irq_desc[i].chip->name ? : "-");
-		seq_printf(p, "  %s", action->name);
-		for (action = action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-
-		seq_putc(p, '\n');
-	unlock:
-		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	}
-
-	return 0;
-}
-#endif

diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index e9d1205..47ba4b9 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c

@@ -61,45 +61,42 @@
 static struct eic *nmi_eic;
 static bool nmi_enabled;
 
-static void eic_ack_irq(unsigned int irq)
+static void eic_ack_irq(struct irq_chip *d)
 {
-	struct eic *eic = get_irq_chip_data(irq);
-	eic_writel(eic, ICR, 1 << (irq - eic->first_irq));
+	struct eic *eic = irq_data_get_irq_chip_data(data);
+	eic_writel(eic, ICR, 1 << (d->irq - eic->first_irq));
 }
 
-static void eic_mask_irq(unsigned int irq)
+static void eic_mask_irq(struct irq_chip *d)
 {
-	struct eic *eic = get_irq_chip_data(irq);
-	eic_writel(eic, IDR, 1 << (irq - eic->first_irq));
+	struct eic *eic = irq_data_get_irq_chip_data(data);
+	eic_writel(eic, IDR, 1 << (d->irq - eic->first_irq));
 }
 
-static void eic_mask_ack_irq(unsigned int irq)
+static void eic_mask_ack_irq(struct irq_chip *d)
 {
-	struct eic *eic = get_irq_chip_data(irq);
-	eic_writel(eic, ICR, 1 << (irq - eic->first_irq));
-	eic_writel(eic, IDR, 1 << (irq - eic->first_irq));
+	struct eic *eic = irq_data_get_irq_chip_data(data);
+	eic_writel(eic, ICR, 1 << (d->irq - eic->first_irq));
+	eic_writel(eic, IDR, 1 << (d->irq - eic->first_irq));
 }
 
-static void eic_unmask_irq(unsigned int irq)
+static void eic_unmask_irq(struct irq_chip *d)
 {
-	struct eic *eic = get_irq_chip_data(irq);
-	eic_writel(eic, IER, 1 << (irq - eic->first_irq));
+	struct eic *eic = irq_data_get_irq_chip_data(data);
+	eic_writel(eic, IER, 1 << (d->irq - eic->first_irq));
 }
 
-static int eic_set_irq_type(unsigned int irq, unsigned int flow_type)
+static int eic_set_irq_type(struct irq_chip *d, unsigned int flow_type)
 {
-	struct eic *eic = get_irq_chip_data(irq);
-	struct irq_desc *desc;
+	struct eic *eic = irq_data_get_irq_chip_data(data);
+	unsigned int irq = d->irq;
 	unsigned int i = irq - eic->first_irq;
 	u32 mode, edge, level;
-	int ret = 0;
 
 	flow_type &= IRQ_TYPE_SENSE_MASK;
 	if (flow_type == IRQ_TYPE_NONE)
 		flow_type = IRQ_TYPE_LEVEL_LOW;
 
-	desc = &irq_desc[irq];
-
 	mode = eic_readl(eic, MODE);
 	edge = eic_readl(eic, EDGE);
 	level = eic_readl(eic, LEVEL);
@@ -122,39 +119,34 @@
 		edge &= ~(1 << i);
 		break;
 	default:
-		ret = -EINVAL;
-		break;
+		return -EINVAL;
 	}
 
-	if (ret == 0) {
-		eic_writel(eic, MODE, mode);
-		eic_writel(eic, EDGE, edge);
-		eic_writel(eic, LEVEL, level);
+	eic_writel(eic, MODE, mode);
+	eic_writel(eic, EDGE, edge);
+	eic_writel(eic, LEVEL, level);
 
-		if (flow_type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) {
-			flow_type |= IRQ_LEVEL;
-			__set_irq_handler_unlocked(irq, handle_level_irq);
-		} else
-			__set_irq_handler_unlocked(irq, handle_edge_irq);
-		desc->status &= ~(IRQ_TYPE_SENSE_MASK | IRQ_LEVEL);
-		desc->status |= flow_type;
-	}
+	irqd_set_trigger_type(d, flow_type);
+	if (flow_type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
+		__irq_set_handler_locked(irq, handle_level_irq);
+	else
+		__irq_set_handler_locked(irq, handle_edge_irq);
 
-	return ret;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
 static struct irq_chip eic_chip = {
 	.name		= "eic",
-	.ack		= eic_ack_irq,
-	.mask		= eic_mask_irq,
-	.mask_ack	= eic_mask_ack_irq,
-	.unmask		= eic_unmask_irq,
-	.set_type	= eic_set_irq_type,
+	.irq_ack	= eic_ack_irq,
+	.irq_mask	= eic_mask_irq,
+	.irq_mask_ack	= eic_mask_ack_irq,
+	.irq_unmask	= eic_unmask_irq,
+	.irq_set_type	= eic_set_irq_type,
 };
 
 static void demux_eic_irq(unsigned int irq, struct irq_desc *desc)
 {
-	struct eic *eic = desc->handler_data;
+	struct eic *eic = irq_desc_get_handler_data(desc);
 	unsigned long status, pending;
 	unsigned int i;
 
@@ -234,13 +226,13 @@
 	eic->chip = &eic_chip;
 
 	for (i = 0; i < nr_of_irqs; i++) {
-		set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
+		irq_set_chip_and_handler(eic->first_irq + i, &eic_chip,
 					 handle_level_irq);
-		set_irq_chip_data(eic->first_irq + i, eic);
+		irq_set_chip_data(eic->first_irq + i, eic);
 	}
 
-	set_irq_chained_handler(int_irq, demux_eic_irq);
-	set_irq_data(int_irq, eic);
+	irq_set_chained_handler(int_irq, demux_eic_irq);
+	irq_set_handler_data(int_irq, eic);
 
 	if (pdev->id == 0) {
 		nmi_eic = eic;

diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c
index 994c4545..21ce35f 100644
--- a/arch/avr32/mach-at32ap/intc.c
+++ b/arch/avr32/mach-at32ap/intc.c

@@ -34,12 +34,12 @@
  * TODO: We may be able to implement mask/unmask by setting IxM flags
  * in the status register.
  */
-static void intc_mask_irq(unsigned int irq)
+static void intc_mask_irq(struct irq_data *d)
 {
 
 }
 
-static void intc_unmask_irq(unsigned int irq)
+static void intc_unmask_irq(struct irq_data *d)
 {
 
 }
@@ -47,8 +47,8 @@
 static struct intc intc0 = {
 	.chip = {
 		.name		= "intc",
-		.mask		= intc_mask_irq,
-		.unmask		= intc_unmask_irq,
+		.irq_mask	= intc_mask_irq,
+		.irq_unmask	= intc_unmask_irq,
 	},
 };
 
@@ -57,7 +57,6 @@
  */
 asmlinkage void do_IRQ(int level, struct pt_regs *regs)
 {
-	struct irq_desc *desc;
 	struct pt_regs *old_regs;
 	unsigned int irq;
 	unsigned long status_reg;
@@ -69,8 +68,7 @@
 	irq_enter();
 
 	irq = intc_readl(&intc0, INTCAUSE0 - 4 * level);
-	desc = irq_desc + irq;
-	desc->handle_irq(irq, desc);
+	generic_handle_irq(irq);
 
 	/*
 	 * Clear all interrupt level masks so that we may handle
@@ -128,7 +126,7 @@
 		intc_writel(&intc0, INTPR0 + 4 * i, offset);
 		readback = intc_readl(&intc0, INTPR0 + 4 * i);
 		if (readback == offset)
-			set_irq_chip_and_handler(i, &intc0.chip,
+			irq_set_chip_and_handler(i, &intc0.chip,
 						 handle_simple_irq);
 	}
 

diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c
index 09a274c..3753410 100644
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c

@@ -249,23 +249,23 @@
 
 /* GPIO IRQ support */
 
-static void gpio_irq_mask(unsigned irq)
+static void gpio_irq_mask(struct irq_data *d)
 {
-	unsigned		gpio = irq_to_gpio(irq);
+	unsigned		gpio = irq_to_gpio(d->irq);
 	struct pio_device	*pio = &pio_dev[gpio >> 5];
 
 	pio_writel(pio, IDR, 1 << (gpio & 0x1f));
 }
 
-static void gpio_irq_unmask(unsigned irq)
+static void gpio_irq_unmask(struct irq_data *d))
 {
-	unsigned		gpio = irq_to_gpio(irq);
+	unsigned		gpio = irq_to_gpio(d->irq);
 	struct pio_device	*pio = &pio_dev[gpio >> 5];
 
 	pio_writel(pio, IER, 1 << (gpio & 0x1f));
 }
 
-static int gpio_irq_type(unsigned irq, unsigned type)
+static int gpio_irq_type(struct irq_data *d, unsigned type)
 {
 	if (type != IRQ_TYPE_EDGE_BOTH && type != IRQ_TYPE_NONE)
 		return -EINVAL;
@@ -275,20 +275,19 @@
 
 static struct irq_chip gpio_irqchip = {
 	.name		= "gpio",
-	.mask		= gpio_irq_mask,
-	.unmask		= gpio_irq_unmask,
-	.set_type	= gpio_irq_type,
+	.irq_mask	= gpio_irq_mask,
+	.irq_unmask	= gpio_irq_unmask,
+	.irq_set_type	= gpio_irq_type,
 };
 
 static void gpio_irq_handler(unsigned irq, struct irq_desc *desc)
 {
-	struct pio_device	*pio = get_irq_chip_data(irq);
+	struct pio_device	*pio = get_irq_desc_chip_data(desc);
 	unsigned		gpio_irq;
 
-	gpio_irq = (unsigned) get_irq_data(irq);
+	gpio_irq = (unsigned) irq_get_handler_data(irq);
 	for (;;) {
 		u32		isr;
-		struct irq_desc	*d;
 
 		/* ack pending GPIO interrupts */
 		isr = pio_readl(pio, ISR) & pio_readl(pio, IMR);
@@ -301,9 +300,7 @@
 			isr &= ~(1 << i);
 
 			i += gpio_irq;
-			d = &irq_desc[i];
-
-			d->handle_irq(i, d);
+			generic_handle_irq(i);
 		} while (isr);
 	}
 }
@@ -313,16 +310,16 @@
 {
 	unsigned	i;
 
-	set_irq_chip_data(irq, pio);
-	set_irq_data(irq, (void *) gpio_irq);
+	irq_set_chip_data(irq, pio);
+	irq_set_handler_data(irq, (void *)gpio_irq);
 
 	for (i = 0; i < 32; i++, gpio_irq++) {
-		set_irq_chip_data(gpio_irq, pio);
-		set_irq_chip_and_handler(gpio_irq, &gpio_irqchip,
-				handle_simple_irq);
+		irq_set_chip_data(gpio_irq, pio);
+		irq_set_chip_and_handler(gpio_irq, &gpio_irqchip,
+					 handle_simple_irq);
 	}
 
-	set_irq_chained_handler(irq, gpio_irq_handler);
+	irq_set_chained_handler(irq, gpio_irq_handler);
 }
 
 /*--------------------------------------------------------------------------*/

diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 4db5b46..04a7fc5 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig

@@ -276,7 +276,6 @@
 	select MTD_CHAR
 	select MTD_BLOCK
 	select MTD_PARTITIONS
-	select MTD_CONCAT
 	select MTD_COMPLEX_MAPPINGS
 	help
 	  This option enables MTD mapping of flash devices.  Needed to use

diff --git a/arch/cris/arch-v10/drivers/axisflashmap.c b/arch/cris/arch-v10/drivers/axisflashmap.c
index b207970..ed708e1 100644
--- a/arch/cris/arch-v10/drivers/axisflashmap.c
+++ b/arch/cris/arch-v10/drivers/axisflashmap.c

@@ -234,7 +234,6 @@
 	}
 
 	if (mtd_cse0 && mtd_cse1) {
-#ifdef CONFIG_MTD_CONCAT
 		struct mtd_info *mtds[] = { mtd_cse0, mtd_cse1 };
 
 		/* Since the concatenation layer adds a small overhead we
@@ -246,11 +245,6 @@
 		 */
 		mtd_cse = mtd_concat_create(mtds, ARRAY_SIZE(mtds),
 					    "cse0+cse1");
-#else
-		printk(KERN_ERR "%s and %s: Cannot concatenate due to kernel "
-		       "(mis)configuration!\n", map_cse0.name, map_cse1.name);
-		mtd_cse = NULL;
-#endif
 		if (!mtd_cse) {
 			printk(KERN_ERR "%s and %s: Concatenation failed!\n",
 			       map_cse0.name, map_cse1.name);

diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index a2dd740..1633b12 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig

@@ -406,7 +406,6 @@
 	select MTD_CHAR
 	select MTD_BLOCK
 	select MTD_PARTITIONS
-	select MTD_CONCAT
 	select MTD_COMPLEX_MAPPINGS
 	help
 	  This option enables MTD mapping of flash devices.  Needed to use

diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index 51e1e85..3d75125 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c

@@ -275,7 +275,6 @@
 	}
 
 	if (count > 1) {
-#ifdef CONFIG_MTD_CONCAT
 		/* Since the concatenation layer adds a small overhead we
 		 * could try to figure out if the chips in cse0 and cse1 are
 		 * identical and reprobe the whole cse0+cse1 window. But since
@@ -284,11 +283,6 @@
 		 * complicating the probing procedure.
 		 */
 		mtd_total = mtd_concat_create(mtds, count, "cse0+cse1");
-#else
-		printk(KERN_ERR "%s and %s: Cannot concatenate due to kernel "
-		       "(mis)configuration!\n", map_cse0.name, map_cse1.name);
-		mtd_toal = NULL;
-#endif
 		if (!mtd_total) {
 			printk(KERN_ERR "%s and %s: Concatenation failed!\n",
 				map_cse0.name, map_cse1.name);

diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 9624db1..931a1ac 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig

@@ -4,6 +4,7 @@
 	select HAVE_IDE
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_HARDIRQS_NO_DEPRECATED
+	select GENERIC_IRQ_SHOW
 
 config SYMBOL_PREFIX
 	string

diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c
index 7643d39..1f67fed 100644
--- a/arch/h8300/kernel/irq.c
+++ b/arch/h8300/kernel/irq.c

@@ -155,7 +155,7 @@
 	setup_vector();
 
 	for (c = 0; c < NR_IRQS; c++)
-		set_irq_chip_and_handler(c, &h8300irq_chip, handle_simple_irq);
+		irq_set_chip_and_handler(c, &h8300irq_chip, handle_simple_irq);
 }
 
 asmlinkage void do_IRQ(int irq)
@@ -164,34 +164,3 @@
 	generic_handle_irq(irq);
 	irq_exit();
 }
-
-#if defined(CONFIG_PROC_FS)
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int i = *(loff_t *) v;
-	struct irqaction * action;
-	unsigned long flags;
-
-	if (i == 0)
-		seq_puts(p, "           CPU0");
-
-	if (i < NR_IRQS) {
-		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
-		action = irq_desc[i].action;
-		if (!action)
-			goto unlock;
-		seq_printf(p, "%3d: ",i);
-		seq_printf(p, "%10u ", kstat_irqs(i));
-		seq_printf(p, " %14s", irq_desc[i].irq_data.chip->name);
-		seq_printf(p, "-%-8s", irq_desc[i].name);
-		seq_printf(p, "  %s", action->name);
-
-		for (action=action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-		seq_putc(p, '\n');
-unlock:
-		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	}
-	return 0;
-}
-#endif

diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 54bf540..9a018cd 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c

@@ -36,7 +36,7 @@
  * Shows a simple page count of reserved and used pages in the system.
  * For discontig machines, it does this on a per-pgdat basis.
  */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int i, total_reserved = 0;
 	int total_shared = 0, total_cached = 0;

diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 6162032..82ab1bc 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c

@@ -614,7 +614,7 @@
  * Shows a simple page count of reserved and used pages in the system.
  * For discontig machines, it does this on a per-pgdat basis.
  */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int i, total_reserved = 0;
 	int total_shared = 0, total_cached = 0;

diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 62afe23..b28d090 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig

@@ -10,6 +10,7 @@
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_HARDIRQS_NO_DEPRECATED
 	select GENERIC_IRQ_PROBE
+	select GENERIC_IRQ_SHOW
 
 config SBUS
 	bool

diff --git a/arch/m32r/kernel/irq.c b/arch/m32r/kernel/irq.c
index 76eaf38..c7272b8 100644
--- a/arch/m32r/kernel/irq.c
+++ b/arch/m32r/kernel/irq.c

@@ -18,55 +18,10 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
-#include <linux/seq_file.h>
 #include <linux/module.h>
 #include <asm/uaccess.h>
 
 /*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
-
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%d       ",j);
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		raw_spin_lock_irqsave(&desc->lock, flags);
-		action = desc->action;
-		if (!action)
-			goto skip;
-		seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#endif
-		seq_printf(p, " %14s", desc->irq_data.chip->name);
-		seq_printf(p, "  %s", action->name);
-
-		for (action=action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-
-		seq_putc(p, '\n');
-skip:
-		raw_spin_unlock_irqrestore(&desc->lock, flags);
-	}
-	return 0;
-}
-
-/*
  * do_IRQ handles all normal device IRQs (the special
  * SMP cross-CPU interrupts have their own specific
  * handlers).

diff --git a/arch/m32r/platforms/m32104ut/setup.c b/arch/m32r/platforms/m32104ut/setup.c
index 4a693d0..34671d3 100644
--- a/arch/m32r/platforms/m32104ut/setup.c
+++ b/arch/m32r/platforms/m32104ut/setup.c

@@ -76,7 +76,7 @@
 
 #if defined(CONFIG_SMC91X)
 	/* INT#0: LAN controller on M32104UT-LAN (SMC91C111)*/
-	set_irq_chip_and_handler(M32R_IRQ_INT0, &m32104ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT0, &m32104ut_irq_type,
 				 handle_level_irq);
 	/* "H" level sense */
 	cu_data[M32R_IRQ_INT0].icucr = M32R_ICUCR_IEN | M32R_ICUCR_ISMOD11;
@@ -84,20 +84,20 @@
 #endif  /* CONFIG_SMC91X */
 
 	/* MFT2 : system timer */
-	set_irq_chip_and_handler(M32R_IRQ_MFT2, &m32104ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_MFT2, &m32104ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN;
 	disable_m32104ut_irq(M32R_IRQ_MFT2);
 
 #ifdef CONFIG_SERIAL_M32R_SIO
 	/* SIO0_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_R, &m32104ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_R, &m32104ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_R].icucr = M32R_ICUCR_IEN;
 	disable_m32104ut_irq(M32R_IRQ_SIO0_R);
 
 	/* SIO0_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_S, &m32104ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_S, &m32104ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_S].icucr = M32R_ICUCR_IEN;
 	disable_m32104ut_irq(M32R_IRQ_SIO0_S);

diff --git a/arch/m32r/platforms/m32700ut/setup.c b/arch/m32r/platforms/m32700ut/setup.c
index 2074bcc..1053e1c 100644
--- a/arch/m32r/platforms/m32700ut/setup.c
+++ b/arch/m32r/platforms/m32700ut/setup.c

@@ -259,76 +259,76 @@
 {
 #if defined(CONFIG_SMC91X)
 	/* INT#0: LAN controller on M32700UT-LAN (SMC91C111)*/
-	set_irq_chip_and_handler(M32700UT_LAN_IRQ_LAN,
+	irq_set_chip_and_handler(M32700UT_LAN_IRQ_LAN,
 				 &m32700ut_lanpld_irq_type, handle_level_irq);
 	lanpld_icu_data[irq2lanpldirq(M32700UT_LAN_IRQ_LAN)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD02;	/* "H" edge sense */
 	disable_m32700ut_lanpld_irq(M32700UT_LAN_IRQ_LAN);
 #endif  /* CONFIG_SMC91X */
 
 	/* MFT2 : system timer */
-	set_irq_chip_and_handler(M32R_IRQ_MFT2, &m32700ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_MFT2, &m32700ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN;
 	disable_m32700ut_irq(M32R_IRQ_MFT2);
 
 	/* SIO0 : receive */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_R, &m32700ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_R, &m32700ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_R].icucr = 0;
 	disable_m32700ut_irq(M32R_IRQ_SIO0_R);
 
 	/* SIO0 : send */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_S, &m32700ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_S, &m32700ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_S].icucr = 0;
 	disable_m32700ut_irq(M32R_IRQ_SIO0_S);
 
 	/* SIO1 : receive */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_R, &m32700ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_R, &m32700ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_R].icucr = 0;
 	disable_m32700ut_irq(M32R_IRQ_SIO1_R);
 
 	/* SIO1 : send */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_S, &m32700ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_S, &m32700ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_S].icucr = 0;
 	disable_m32700ut_irq(M32R_IRQ_SIO1_S);
 
 	/* DMA1 : */
-	set_irq_chip_and_handler(M32R_IRQ_DMA1, &m32700ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_DMA1, &m32700ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_DMA1].icucr = 0;
 	disable_m32700ut_irq(M32R_IRQ_DMA1);
 
 #ifdef CONFIG_SERIAL_M32R_PLDSIO
 	/* INT#1: SIO0 Receive on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_SIO0_RCV, &m32700ut_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_SIO0_RCV, &m32700ut_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_SIO0_RCV)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD03;
 	disable_m32700ut_pld_irq(PLD_IRQ_SIO0_RCV);
 
 	/* INT#1: SIO0 Send on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_SIO0_SND, &m32700ut_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_SIO0_SND, &m32700ut_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_SIO0_SND)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD03;
 	disable_m32700ut_pld_irq(PLD_IRQ_SIO0_SND);
 #endif  /* CONFIG_SERIAL_M32R_PLDSIO */
 
 	/* INT#1: CFC IREQ on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_CFIREQ, &m32700ut_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFIREQ, &m32700ut_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_CFIREQ)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD01;	/* 'L' level sense */
 	disable_m32700ut_pld_irq(PLD_IRQ_CFIREQ);
 
 	/* INT#1: CFC Insert on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_CFC_INSERT, &m32700ut_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFC_INSERT, &m32700ut_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_CFC_INSERT)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD00;	/* 'L' edge sense */
 	disable_m32700ut_pld_irq(PLD_IRQ_CFC_INSERT);
 
 	/* INT#1: CFC Eject on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_CFC_EJECT, &m32700ut_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFC_EJECT, &m32700ut_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_CFC_EJECT)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD02;	/* 'H' edge sense */
 	disable_m32700ut_pld_irq(PLD_IRQ_CFC_EJECT);
@@ -349,7 +349,7 @@
 
 #if defined(CONFIG_USB)
 	outw(USBCR_OTGS, USBCR); 	/* USBCR: non-OTG */
-	set_irq_chip_and_handler(M32700UT_LCD_IRQ_USB_INT1,
+	irq_set_chip_and_handler(M32700UT_LCD_IRQ_USB_INT1,
 				 &m32700ut_lcdpld_irq_type, handle_level_irq);
 
 	lcdpld_icu_data[irq2lcdpldirq(M32700UT_LCD_IRQ_USB_INT1)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD01;	/* "L" level sense */
@@ -366,7 +366,7 @@
 	/*
 	 * INT3# is used for AR
 	 */
-	set_irq_chip_and_handler(M32R_IRQ_INT3, &m32700ut_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT3, &m32700ut_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT3].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10;
 	disable_m32700ut_irq(M32R_IRQ_INT3);

diff --git a/arch/m32r/platforms/mappi/setup.c b/arch/m32r/platforms/mappi/setup.c
index cdd8c45..35130ac 100644
--- a/arch/m32r/platforms/mappi/setup.c
+++ b/arch/m32r/platforms/mappi/setup.c

@@ -75,39 +75,39 @@
 
 #ifdef CONFIG_NE2000
 	/* INT0 : LAN controller (RTL8019AS) */
-	set_irq_chip_and_handler(M32R_IRQ_INT0, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT0, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT0].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD11;
 	disable_mappi_irq(M32R_IRQ_INT0);
 #endif /* CONFIG_M32R_NE2000 */
 
 	/* MFT2 : system timer */
-	set_irq_chip_and_handler(M32R_IRQ_MFT2, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_MFT2, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN;
 	disable_mappi_irq(M32R_IRQ_MFT2);
 
 #ifdef CONFIG_SERIAL_M32R_SIO
 	/* SIO0_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_R, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_R, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_R].icucr = 0;
 	disable_mappi_irq(M32R_IRQ_SIO0_R);
 
 	/* SIO0_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_S, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_S, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_S].icucr = 0;
 	disable_mappi_irq(M32R_IRQ_SIO0_S);
 
 	/* SIO1_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_R, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_R, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_R].icucr = 0;
 	disable_mappi_irq(M32R_IRQ_SIO1_R);
 
 	/* SIO1_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_S, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_S, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_S].icucr = 0;
 	disable_mappi_irq(M32R_IRQ_SIO1_S);
@@ -115,13 +115,13 @@
 
 #if defined(CONFIG_M32R_PCC)
 	/* INT1 : pccard0 interrupt */
-	set_irq_chip_and_handler(M32R_IRQ_INT1, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT1, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT1].icucr = M32R_ICUCR_IEN | M32R_ICUCR_ISMOD00;
 	disable_mappi_irq(M32R_IRQ_INT1);
 
 	/* INT2 : pccard1 interrupt */
-	set_irq_chip_and_handler(M32R_IRQ_INT2, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT2, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT2].icucr = M32R_ICUCR_IEN | M32R_ICUCR_ISMOD00;
 	disable_mappi_irq(M32R_IRQ_INT2);

diff --git a/arch/m32r/platforms/mappi2/setup.c b/arch/m32r/platforms/mappi2/setup.c
index 9117c30..f3ed6b6 100644
--- a/arch/m32r/platforms/mappi2/setup.c
+++ b/arch/m32r/platforms/mappi2/setup.c

@@ -76,38 +76,38 @@
 {
 #if defined(CONFIG_SMC91X)
 	/* INT0 : LAN controller (SMC91111) */
-	set_irq_chip_and_handler(M32R_IRQ_INT0, &mappi2_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT0, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT0].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10;
 	disable_mappi2_irq(M32R_IRQ_INT0);
 #endif  /* CONFIG_SMC91X */
 
 	/* MFT2 : system timer */
-	set_irq_chip_and_handler(M32R_IRQ_MFT2, &mappi2_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_MFT2, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN;
 	disable_mappi2_irq(M32R_IRQ_MFT2);
 
 #ifdef CONFIG_SERIAL_M32R_SIO
 	/* SIO0_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_R, &mappi2_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_R, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_R].icucr = 0;
 	disable_mappi2_irq(M32R_IRQ_SIO0_R);
 
 	/* SIO0_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_S, &mappi2_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_S, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_S].icucr = 0;
 	disable_mappi2_irq(M32R_IRQ_SIO0_S);
 	/* SIO1_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_R, &mappi2_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_R, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_R].icucr = 0;
 	disable_mappi2_irq(M32R_IRQ_SIO1_R);
 
 	/* SIO1_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_S, &mappi2_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_S, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_S].icucr = 0;
 	disable_mappi2_irq(M32R_IRQ_SIO1_S);
@@ -115,27 +115,27 @@
 
 #if defined(CONFIG_USB)
 	/* INT1 : USB Host controller interrupt */
-	set_irq_chip_and_handler(M32R_IRQ_INT1, &mappi2_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT1, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT1].icucr = M32R_ICUCR_ISMOD01;
 	disable_mappi2_irq(M32R_IRQ_INT1);
 #endif /* CONFIG_USB */
 
 	/* ICUCR40: CFC IREQ */
-	set_irq_chip_and_handler(PLD_IRQ_CFIREQ, &mappi2_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFIREQ, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[PLD_IRQ_CFIREQ].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD01;
 	disable_mappi2_irq(PLD_IRQ_CFIREQ);
 
 #if defined(CONFIG_M32R_CFC)
 	/* ICUCR41: CFC Insert */
-	set_irq_chip_and_handler(PLD_IRQ_CFC_INSERT, &mappi2_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFC_INSERT, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[PLD_IRQ_CFC_INSERT].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD00;
 	disable_mappi2_irq(PLD_IRQ_CFC_INSERT);
 
 	/* ICUCR42: CFC Eject */
-	set_irq_chip_and_handler(PLD_IRQ_CFC_EJECT, &mappi2_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFC_EJECT, &mappi2_irq_type,
 				 handle_level_irq);
 	icu_data[PLD_IRQ_CFC_EJECT].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10;
 	disable_mappi2_irq(PLD_IRQ_CFC_EJECT);

diff --git a/arch/m32r/platforms/mappi3/setup.c b/arch/m32r/platforms/mappi3/setup.c
index b44f5de..2408e35 100644
--- a/arch/m32r/platforms/mappi3/setup.c
+++ b/arch/m32r/platforms/mappi3/setup.c

@@ -75,38 +75,38 @@
 {
 #if defined(CONFIG_SMC91X)
 	/* INT0 : LAN controller (SMC91111) */
-	set_irq_chip_and_handler(M32R_IRQ_INT0, &mappi3_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT0, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT0].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10;
 	disable_mappi3_irq(M32R_IRQ_INT0);
 #endif  /* CONFIG_SMC91X */
 
 	/* MFT2 : system timer */
-	set_irq_chip_and_handler(M32R_IRQ_MFT2, &mappi3_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_MFT2, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN;
 	disable_mappi3_irq(M32R_IRQ_MFT2);
 
 #ifdef CONFIG_SERIAL_M32R_SIO
 	/* SIO0_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_R, &mappi3_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_R, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_R].icucr = 0;
 	disable_mappi3_irq(M32R_IRQ_SIO0_R);
 
 	/* SIO0_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_S, &mappi3_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_S, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_S].icucr = 0;
 	disable_mappi3_irq(M32R_IRQ_SIO0_S);
 	/* SIO1_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_R, &mappi3_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_R, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_R].icucr = 0;
 	disable_mappi3_irq(M32R_IRQ_SIO1_R);
 
 	/* SIO1_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_S, &mappi3_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_S, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_S].icucr = 0;
 	disable_mappi3_irq(M32R_IRQ_SIO1_S);
@@ -114,21 +114,21 @@
 
 #if defined(CONFIG_USB)
 	/* INT1 : USB Host controller interrupt */
-	set_irq_chip_and_handler(M32R_IRQ_INT1, &mappi3_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT1, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT1].icucr = M32R_ICUCR_ISMOD01;
 	disable_mappi3_irq(M32R_IRQ_INT1);
 #endif /* CONFIG_USB */
 
 	/* CFC IREQ */
-	set_irq_chip_and_handler(PLD_IRQ_CFIREQ, &mappi3_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFIREQ, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[PLD_IRQ_CFIREQ].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD01;
 	disable_mappi3_irq(PLD_IRQ_CFIREQ);
 
 #if defined(CONFIG_M32R_CFC)
 	/* ICUCR41: CFC Insert & eject */
-	set_irq_chip_and_handler(PLD_IRQ_CFC_INSERT, &mappi3_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFC_INSERT, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[PLD_IRQ_CFC_INSERT].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD00;
 	disable_mappi3_irq(PLD_IRQ_CFC_INSERT);
@@ -136,7 +136,7 @@
 #endif /* CONFIG_M32R_CFC */
 
 	/* IDE IREQ */
-	set_irq_chip_and_handler(PLD_IRQ_IDEIREQ, &mappi3_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_IDEIREQ, &mappi3_irq_type,
 				 handle_level_irq);
 	icu_data[PLD_IRQ_IDEIREQ].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10;
 	disable_mappi3_irq(PLD_IRQ_IDEIREQ);

diff --git a/arch/m32r/platforms/oaks32r/setup.c b/arch/m32r/platforms/oaks32r/setup.c
index 19a02db..83b46b0 100644
--- a/arch/m32r/platforms/oaks32r/setup.c
+++ b/arch/m32r/platforms/oaks32r/setup.c

@@ -74,39 +74,39 @@
 
 #ifdef CONFIG_NE2000
 	/* INT3 : LAN controller (RTL8019AS) */
-	set_irq_chip_and_handler(M32R_IRQ_INT3, &oaks32r_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT3, &oaks32r_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT3].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10;
 	disable_oaks32r_irq(M32R_IRQ_INT3);
 #endif /* CONFIG_M32R_NE2000 */
 
 	/* MFT2 : system timer */
-	set_irq_chip_and_handler(M32R_IRQ_MFT2, &oaks32r_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_MFT2, &oaks32r_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN;
 	disable_oaks32r_irq(M32R_IRQ_MFT2);
 
 #ifdef CONFIG_SERIAL_M32R_SIO
 	/* SIO0_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_R, &oaks32r_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_R, &oaks32r_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_R].icucr = 0;
 	disable_oaks32r_irq(M32R_IRQ_SIO0_R);
 
 	/* SIO0_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_S, &oaks32r_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_S, &oaks32r_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_S].icucr = 0;
 	disable_oaks32r_irq(M32R_IRQ_SIO0_S);
 
 	/* SIO1_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_R, &oaks32r_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_R, &oaks32r_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_R].icucr = 0;
 	disable_oaks32r_irq(M32R_IRQ_SIO1_R);
 
 	/* SIO1_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_S, &oaks32r_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_S, &oaks32r_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_S].icucr = 0;
 	disable_oaks32r_irq(M32R_IRQ_SIO1_S);

diff --git a/arch/m32r/platforms/opsput/setup.c b/arch/m32r/platforms/opsput/setup.c
index 1273154..3266070 100644
--- a/arch/m32r/platforms/opsput/setup.c
+++ b/arch/m32r/platforms/opsput/setup.c

@@ -259,76 +259,76 @@
 {
 #if defined(CONFIG_SMC91X)
 	/* INT#0: LAN controller on OPSPUT-LAN (SMC91C111)*/
-	set_irq_chip_and_handler(OPSPUT_LAN_IRQ_LAN, &opsput_lanpld_irq_type,
+	irq_set_chip_and_handler(OPSPUT_LAN_IRQ_LAN, &opsput_lanpld_irq_type,
 				 handle_level_irq);
 	lanpld_icu_data[irq2lanpldirq(OPSPUT_LAN_IRQ_LAN)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD02;	/* "H" edge sense */
 	disable_opsput_lanpld_irq(OPSPUT_LAN_IRQ_LAN);
 #endif  /* CONFIG_SMC91X */
 
 	/* MFT2 : system timer */
-	set_irq_chip_and_handler(M32R_IRQ_MFT2, &opsput_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_MFT2, &opsput_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN;
 	disable_opsput_irq(M32R_IRQ_MFT2);
 
 	/* SIO0 : receive */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_R, &opsput_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_R, &opsput_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_R].icucr = 0;
 	disable_opsput_irq(M32R_IRQ_SIO0_R);
 
 	/* SIO0 : send */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_S, &opsput_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_S, &opsput_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_S].icucr = 0;
 	disable_opsput_irq(M32R_IRQ_SIO0_S);
 
 	/* SIO1 : receive */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_R, &opsput_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_R, &opsput_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_R].icucr = 0;
 	disable_opsput_irq(M32R_IRQ_SIO1_R);
 
 	/* SIO1 : send */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_S, &opsput_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_S, &opsput_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_S].icucr = 0;
 	disable_opsput_irq(M32R_IRQ_SIO1_S);
 
 	/* DMA1 : */
-	set_irq_chip_and_handler(M32R_IRQ_DMA1, &opsput_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_DMA1, &opsput_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_DMA1].icucr = 0;
 	disable_opsput_irq(M32R_IRQ_DMA1);
 
 #ifdef CONFIG_SERIAL_M32R_PLDSIO
 	/* INT#1: SIO0 Receive on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_SIO0_RCV, &opsput_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_SIO0_RCV, &opsput_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_SIO0_RCV)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD03;
 	disable_opsput_pld_irq(PLD_IRQ_SIO0_RCV);
 
 	/* INT#1: SIO0 Send on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_SIO0_SND, &opsput_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_SIO0_SND, &opsput_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_SIO0_SND)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD03;
 	disable_opsput_pld_irq(PLD_IRQ_SIO0_SND);
 #endif  /* CONFIG_SERIAL_M32R_PLDSIO */
 
 	/* INT#1: CFC IREQ on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_CFIREQ, &opsput_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFIREQ, &opsput_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_CFIREQ)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD01;	/* 'L' level sense */
 	disable_opsput_pld_irq(PLD_IRQ_CFIREQ);
 
 	/* INT#1: CFC Insert on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_CFC_INSERT, &opsput_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFC_INSERT, &opsput_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_CFC_INSERT)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD00;	/* 'L' edge sense */
 	disable_opsput_pld_irq(PLD_IRQ_CFC_INSERT);
 
 	/* INT#1: CFC Eject on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_CFC_EJECT, &opsput_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_CFC_EJECT, &opsput_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_CFC_EJECT)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD02;	/* 'H' edge sense */
 	disable_opsput_pld_irq(PLD_IRQ_CFC_EJECT);
@@ -349,7 +349,7 @@
 
 #if defined(CONFIG_USB)
 	outw(USBCR_OTGS, USBCR);	/* USBCR: non-OTG */
-	set_irq_chip_and_handler(OPSPUT_LCD_IRQ_USB_INT1,
+	irq_set_chip_and_handler(OPSPUT_LCD_IRQ_USB_INT1,
 				 &opsput_lcdpld_irq_type, handle_level_irq);
 	lcdpld_icu_data[irq2lcdpldirq(OPSPUT_LCD_IRQ_USB_INT1)].icucr = PLD_ICUCR_IEN|PLD_ICUCR_ISMOD01;	/* "L" level sense */
 	disable_opsput_lcdpld_irq(OPSPUT_LCD_IRQ_USB_INT1);
@@ -365,7 +365,7 @@
 	/*
 	 * INT3# is used for AR
 	 */
-	set_irq_chip_and_handler(M32R_IRQ_INT3, &opsput_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_INT3, &opsput_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_INT3].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10;
 	disable_opsput_irq(M32R_IRQ_INT3);

diff --git a/arch/m32r/platforms/usrv/setup.c b/arch/m32r/platforms/usrv/setup.c
index f3cff26..0c7a1e8 100644
--- a/arch/m32r/platforms/usrv/setup.c
+++ b/arch/m32r/platforms/usrv/setup.c

@@ -138,32 +138,32 @@
 		once++;
 
 	/* MFT2 : system timer */
-	set_irq_chip_and_handler(M32R_IRQ_MFT2, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_MFT2, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_MFT2].icucr = M32R_ICUCR_IEN;
 	disable_mappi_irq(M32R_IRQ_MFT2);
 
 #if defined(CONFIG_SERIAL_M32R_SIO)
 	/* SIO0_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_R, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_R, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_R].icucr = 0;
 	disable_mappi_irq(M32R_IRQ_SIO0_R);
 
 	/* SIO0_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO0_S, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO0_S, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO0_S].icucr = 0;
 	disable_mappi_irq(M32R_IRQ_SIO0_S);
 
 	/* SIO1_R : uart receive data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_R, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_R, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_R].icucr = 0;
 	disable_mappi_irq(M32R_IRQ_SIO1_R);
 
 	/* SIO1_S : uart send data */
-	set_irq_chip_and_handler(M32R_IRQ_SIO1_S, &mappi_irq_type,
+	irq_set_chip_and_handler(M32R_IRQ_SIO1_S, &mappi_irq_type,
 				 handle_level_irq);
 	icu_data[M32R_IRQ_SIO1_S].icucr = 0;
 	disable_mappi_irq(M32R_IRQ_SIO1_S);
@@ -171,7 +171,7 @@
 
 	/* INT#67-#71: CFC#0 IREQ on PLD */
 	for (i = 0 ; i < CONFIG_M32R_CFC_NUM ; i++ ) {
-		set_irq_chip_and_handler(PLD_IRQ_CF0 + i,
+		irq_set_chip_and_handler(PLD_IRQ_CF0 + i,
 					 &m32700ut_pld_irq_type,
 					 handle_level_irq);
 		pld_icu_data[irq2pldirq(PLD_IRQ_CF0 + i)].icucr
@@ -181,14 +181,14 @@
 
 #if defined(CONFIG_SERIAL_8250) || defined(CONFIG_SERIAL_8250_MODULE)
 	/* INT#76: 16552D#0 IREQ on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_UART0, &m32700ut_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_UART0, &m32700ut_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_UART0)].icucr
 		= PLD_ICUCR_ISMOD03;	/* 'H' level sense */
 	disable_m32700ut_pld_irq(PLD_IRQ_UART0);
 
 	/* INT#77: 16552D#1 IREQ on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_UART1, &m32700ut_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_UART1, &m32700ut_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_UART1)].icucr
 		= PLD_ICUCR_ISMOD03;	/* 'H' level sense */
@@ -197,7 +197,7 @@
 
 #if defined(CONFIG_IDC_AK4524) || defined(CONFIG_IDC_AK4524_MODULE)
 	/* INT#80: AK4524 IREQ on PLD */
-	set_irq_chip_and_handler(PLD_IRQ_SNDINT, &m32700ut_pld_irq_type,
+	irq_set_chip_and_handler(PLD_IRQ_SNDINT, &m32700ut_pld_irq_type,
 				 handle_level_irq);
 	pld_icu_data[irq2pldirq(PLD_IRQ_SNDINT)].icucr
 		= PLD_ICUCR_ISMOD01;	/* 'L' level sense */

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 525174d..6e056d3 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig

@@ -1,13 +1,11 @@
 config M68K
 	bool
 	default y
-	select HAVE_AOUT
 	select HAVE_IDE
-	select GENERIC_ATOMIC64
-
-config MMU
-	bool
-	default y
+	select HAVE_AOUT if MMU
+	select GENERIC_ATOMIC64 if MMU
+	select HAVE_GENERIC_HARDIRQS if !MMU
+	select GENERIC_HARDIRQS_NO_DEPRECATED if !MMU
 
 config RWSEM_GENERIC_SPINLOCK
 	bool
@@ -34,457 +32,67 @@
 	bool
 	default y
 
-config GENERIC_IOMAP
-	bool
-	default y
-
-config ARCH_MAY_HAVE_PC_FDC
-	bool
-	depends on BROKEN && (Q40 || SUN3X)
-	default y
-
 config NO_IOPORT
 	def_bool y
 
 config NO_DMA
-	def_bool SUN3
+	def_bool (MMU && SUN3) || (!MMU && !COLDFIRE)
 
+config ZONE_DMA
+	bool
+	default y
 config HZ
 	int
+	default 1000 if CLEOPATRA
 	default 100
 
-config ARCH_USES_GETTIMEOFFSET
-	def_bool y
-
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
 
+config MMU
+	bool "MMU-based Paged Memory Management Support"
+	default y
+	help
+	  Select if you want MMU-based virtualised addressing space
+	  support by paged memory management. If unsure, say 'Y'.
+
 menu "Platform dependent setup"
 
-config EISA
-	bool
-	---help---
-	  The Extended Industry Standard Architecture (EISA) bus was
-	  developed as an open alternative to the IBM MicroChannel bus.
-
-	  The EISA bus provided some of the features of the IBM MicroChannel
-	  bus while maintaining backward compatibility with cards made for
-	  the older ISA bus.  The EISA bus saw limited use between 1988 and
-	  1995 when it was made obsolete by the PCI bus.
-
-	  Say Y here if you are building a kernel for an EISA-based machine.
-
-	  Otherwise, say N.
-
-config MCA
-	bool
-	help
-	  MicroChannel Architecture is found in some IBM PS/2 machines and
-	  laptops.  It is a bus system similar to PCI or ISA. See
-	  <file:Documentation/mca.txt> (and especially the web page given
-	  there) before attempting to build an MCA bus kernel.
-
-config PCMCIA
-	tristate
-	---help---
-	  Say Y here if you want to attach PCMCIA- or PC-cards to your Linux
-	  computer.  These are credit-card size devices such as network cards,
-	  modems or hard drives often used with laptops computers.  There are
-	  actually two varieties of these cards: the older 16 bit PCMCIA cards
-	  and the newer 32 bit CardBus cards.  If you want to use CardBus
-	  cards, you need to say Y here and also to "CardBus support" below.
-
-	  To use your PC-cards, you will need supporting software from David
-	  Hinds' pcmcia-cs package (see the file <file:Documentation/Changes>
-	  for location).  Please also read the PCMCIA-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>.
-
-	  To compile this driver as modules, choose M here: the
-	  modules will be called pcmcia_core and ds.
-
-config AMIGA
-	bool "Amiga support"
-	select MMU_MOTOROLA if MMU
-	help
-	  This option enables support for the Amiga series of computers. If
-	  you plan to use this kernel on an Amiga, say Y here and browse the
-	  material available in <file:Documentation/m68k>; otherwise say N.
-
-config ATARI
-	bool "Atari support"
-	select MMU_MOTOROLA if MMU
-	help
-	  This option enables support for the 68000-based Atari series of
-	  computers (including the TT, Falcon and Medusa). If you plan to use
-	  this kernel on an Atari, say Y here and browse the material
-	  available in <file:Documentation/m68k>; otherwise say N.
-
-config MAC
-	bool "Macintosh support"
-	select MMU_MOTOROLA if MMU
-	help
-	  This option enables support for the Apple Macintosh series of
-	  computers (yes, there is experimental support now, at least for part
-	  of the series).
-
-	  Say N unless you're willing to code the remaining necessary support.
-	  ;)
-
-config NUBUS
-	bool
-	depends on MAC
-	default y
-
-config M68K_L2_CACHE
-	bool
-	depends on MAC
-	default y
-
-config APOLLO
-	bool "Apollo support"
-	select MMU_MOTOROLA if MMU
-	help
-	  Say Y here if you want to run Linux on an MC680x0-based Apollo
-	  Domain workstation such as the DN3500.
-
-config VME
-	bool "VME (Motorola and BVM) support"
-	select MMU_MOTOROLA if MMU
-	help
-	  Say Y here if you want to build a kernel for a 680x0 based VME
-	  board.  Boards currently supported include Motorola boards MVME147,
-	  MVME162, MVME166, MVME167, MVME172, and MVME177.  BVME4000 and
-	  BVME6000 boards from BVM Ltd are also supported.
-
-config MVME147
-	bool "MVME147 support"
-	depends on VME
-	help
-	  Say Y to include support for early Motorola VME boards.  This will
-	  build a kernel which can run on MVME147 single-board computers.  If
-	  you select this option you will have to select the appropriate
-	  drivers for SCSI, Ethernet and serial ports later on.
-
-config MVME16x
-	bool "MVME162, 166 and 167 support"
-	depends on VME
-	help
-	  Say Y to include support for Motorola VME boards.  This will build a
-	  kernel which can run on MVME162, MVME166, MVME167, MVME172, and
-	  MVME177 boards.  If you select this option you will have to select
-	  the appropriate drivers for SCSI, Ethernet and serial ports later
-	  on.
-
-config BVME6000
-	bool "BVME4000 and BVME6000 support"
-	depends on VME
-	help
-	  Say Y to include support for VME boards from BVM Ltd.  This will
-	  build a kernel which can run on BVME4000 and BVME6000 boards.  If
-	  you select this option you will have to select the appropriate
-	  drivers for SCSI, Ethernet and serial ports later on.
-
-config HP300
-	bool "HP9000/300 and HP9000/400 support"
-	select MMU_MOTOROLA if MMU
-	help
-	  This option enables support for the HP9000/300 and HP9000/400 series
-	  of workstations. Support for these machines is still somewhat
-	  experimental. If you plan to try to use the kernel on such a machine
-	  say Y here.
-	  Everybody else says N.
-
-config DIO
-	bool "DIO bus support"
-	depends on HP300
-	default y
-	help
-	  Say Y here to enable support for the "DIO" expansion bus used in
-	  HP300 machines. If you are using such a system you almost certainly
-	  want this.
-
-config SUN3X
-	bool "Sun3x support"
-	select MMU_MOTOROLA if MMU
-	select M68030
-	help
-	  This option enables support for the Sun 3x series of workstations.
-	  Be warned that this support is very experimental.
-	  Note that Sun 3x kernels are not compatible with Sun 3 hardware.
-	  General Linux information on the Sun 3x series (now discontinued)
-	  is at <http://www.angelfire.com/ca2/tech68k/sun3.html>.
-
-	  If you don't want to compile a kernel for a Sun 3x, say N.
-
-config Q40
-	bool "Q40/Q60 support"
-	select MMU_MOTOROLA if MMU
-	help
-	  The Q40 is a Motorola 68040-based successor to the Sinclair QL
-	  manufactured in Germany.  There is an official Q40 home page at
-	  <http://www.q40.de/>.  This option enables support for the Q40 and
-	  Q60. Select your CPU below.  For 68LC060 don't forget to enable FPU
-	  emulation.
-
-config SUN3
-	bool "Sun3 support"
-	depends on !MMU_MOTOROLA
-	select MMU_SUN3 if MMU
-	select M68020
-	help
-	  This option enables support for the Sun 3 series of workstations
-	  (3/50, 3/60, 3/1xx, 3/2xx systems). Enabling this option requires
-	  that all other hardware types must be disabled, as Sun 3 kernels
-	  are incompatible with all other m68k targets (including Sun 3x!).
-
-	  If you don't want to compile a kernel exclusively for a Sun 3, say N.
-
-config NATFEAT
-	bool "ARAnyM emulator support"
-	depends on ATARI
-	help
-	  This option enables support for ARAnyM native features, such as
-	  access to a disk image as /dev/hda.
-
-config NFBLOCK
-	tristate "NatFeat block device support"
-	depends on BLOCK && NATFEAT
-	help
-	  Say Y to include support for the ARAnyM NatFeat block device
-	  which allows direct access to the hard drives without using
-	  the hardware emulation.
-
-config NFCON
-	tristate "NatFeat console driver"
-	depends on NATFEAT
-	help
-	  Say Y to include support for the ARAnyM NatFeat console driver
-	  which allows the console output to be redirected to the stderr
-	  output of ARAnyM.
-
-config NFETH
-	tristate "NatFeat Ethernet support"
-	depends on NET_ETHERNET && NATFEAT
-	help
-	  Say Y to include support for the ARAnyM NatFeat network device
-	  which will emulate a regular ethernet device while presenting an
-	  ethertap device to the host system.
-
-comment "Processor type"
-
-config M68020
-	bool "68020 support"
-	help
-	  If you anticipate running this kernel on a computer with a MC68020
-	  processor, say Y. Otherwise, say N. Note that the 68020 requires a
-	  68851 MMU (Memory Management Unit) to run Linux/m68k, except on the
-	  Sun 3, which provides its own version.
-
-config M68030
-	bool "68030 support"
-	depends on !MMU_SUN3
-	help
-	  If you anticipate running this kernel on a computer with a MC68030
-	  processor, say Y. Otherwise, say N. Note that a MC68EC030 will not
-	  work, as it does not include an MMU (Memory Management Unit).
-
-config M68040
-	bool "68040 support"
-	depends on !MMU_SUN3
-	help
-	  If you anticipate running this kernel on a computer with a MC68LC040
-	  or MC68040 processor, say Y. Otherwise, say N. Note that an
-	  MC68EC040 will not work, as it does not include an MMU (Memory
-	  Management Unit).
-
-config M68060
-	bool "68060 support"
-	depends on !MMU_SUN3
-	help
-	  If you anticipate running this kernel on a computer with a MC68060
-	  processor, say Y. Otherwise, say N.
-
-config MMU_MOTOROLA
-	bool
-
-config MMU_SUN3
-	bool
-	depends on MMU && !MMU_MOTOROLA
-
-config M68KFPU_EMU
-	bool "Math emulation support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
-	help
-	  At some point in the future, this will cause floating-point math
-	  instructions to be emulated by the kernel on machines that lack a
-	  floating-point math coprocessor.  Thrill-seekers and chronically
-	  sleep-deprived psychotic hacker types can say Y now, everyone else
-	  should probably wait a while.
-
-config M68KFPU_EMU_EXTRAPREC
-	bool "Math emulation extra precision"
-	depends on M68KFPU_EMU
-	help
-	  The fpu uses normally a few bit more during calculations for
-	  correct rounding, the emulator can (often) do the same but this
-	  extra calculation can cost quite some time, so you can disable
-	  it here. The emulator will then "only" calculate with a 64 bit
-	  mantissa and round slightly incorrect, what is more than enough
-	  for normal usage.
-
-config M68KFPU_EMU_ONLY
-	bool "Math emulation only kernel"
-	depends on M68KFPU_EMU
-	help
-	  This option prevents any floating-point instructions from being
-	  compiled into the kernel, thereby the kernel doesn't save any
-	  floating point context anymore during task switches, so this
-	  kernel will only be usable on machines without a floating-point
-	  math coprocessor. This makes the kernel a bit faster as no tests
-	  needs to be executed whether a floating-point instruction in the
-	  kernel should be executed or not.
-
-config ADVANCED
-	bool "Advanced configuration options"
-	---help---
-	  This gives you access to some advanced options for the CPU. The
-	  defaults should be fine for most users, but these options may make
-	  it possible for you to improve performance somewhat if you know what
-	  you are doing.
-
-	  Note that the answer to this question won't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about these options.
-
-	  Most users should say N to this question.
-
-config RMW_INSNS
-	bool "Use read-modify-write instructions"
-	depends on ADVANCED
-	---help---
-	  This allows to use certain instructions that work with indivisible
-	  read-modify-write bus cycles. While this is faster than the
-	  workaround of disabling interrupts, it can conflict with DMA
-	  ( = direct memory access) on many Amiga systems, and it is also said
-	  to destabilize other machines. It is very likely that this will
-	  cause serious problems on any Amiga or Atari Medusa if set. The only
-	  configuration where it should work are 68030-based Ataris, where it
-	  apparently improves performance. But you've been warned! Unless you
-	  really know what you are doing, say N. Try Y only if you're quite
-	  adventurous.
-
-config SINGLE_MEMORY_CHUNK
-	bool "Use one physical chunk of memory only" if ADVANCED && !SUN3
-	default y if SUN3
-	select NEED_MULTIPLE_NODES
-	help
-	  Ignore all but the first contiguous chunk of physical memory for VM
-	  purposes.  This will save a few bytes kernel size and may speed up
-	  some operations.  Say N if not sure.
-
-config 060_WRITETHROUGH
-	bool "Use write-through caching for 68060 supervisor accesses"
-	depends on ADVANCED && M68060
-	---help---
-	  The 68060 generally uses copyback caching of recently accessed data.
-	  Copyback caching means that memory writes will be held in an on-chip
-	  cache and only written back to memory some time later.  Saying Y
-	  here will force supervisor (kernel) accesses to use writethrough
-	  caching.  Writethrough caching means that data is written to memory
-	  straight away, so that cache and memory data always agree.
-	  Writethrough caching is less efficient, but is needed for some
-	  drivers on 68060 based systems where the 68060 bus snooping signal
-	  is hardwired on.  The 53c710 SCSI driver is known to suffer from
-	  this problem.
-
-config ARCH_DISCONTIGMEM_ENABLE
-	def_bool !SINGLE_MEMORY_CHUNK
-
-config NODES_SHIFT
-	int
-	default "3"
-	depends on !SINGLE_MEMORY_CHUNK
+if MMU
+source arch/m68k/Kconfig.mmu
+endif
+if !MMU
+source arch/m68k/Kconfig.nommu
+endif
 
 source "mm/Kconfig"
 
 endmenu
 
-menu "General setup"
+menu "Executable file formats"
 
 source "fs/Kconfig.binfmt"
 
-config ZORRO
-	bool "Amiga Zorro (AutoConfig) bus support"
-	depends on AMIGA
-	help
-	  This enables support for the Zorro bus in the Amiga. If you have
-	  expansion cards in your Amiga that conform to the Amiga
-	  AutoConfig(tm) specification, say Y, otherwise N. Note that even
-	  expansion cards that do not fit in the Zorro slots but fit in e.g.
-	  the CPU slot may fall in this category, so you have to say Y to let
-	  Linux use these.
+endmenu
 
-config AMIGA_PCMCIA
-	bool "Amiga 1200/600 PCMCIA support (EXPERIMENTAL)"
-	depends on AMIGA && EXPERIMENTAL
-	help
-	  Include support in the kernel for pcmcia on Amiga 1200 and Amiga
-	  600. If you intend to use pcmcia cards say Y; otherwise say N.
+if !MMU
+menu "Power management options"
 
-config STRAM_PROC
-	bool "ST-RAM statistics in /proc"
-	depends on ATARI
-	help
-	  Say Y here to report ST-RAM usage statistics in /proc/stram.
-
-config HEARTBEAT
-	bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40
-	default y if !AMIGA && !APOLLO && !ATARI && !MAC && !Q40 && HP300
-	help
-	  Use the power-on LED on your machine as a load meter.  The exact
-	  behavior is platform-dependent, but normally the flash frequency is
-	  a hyperbolic function of the 5-minute load average.
-
-# We have a dedicated heartbeat LED. :-)
-config PROC_HARDWARE
-	bool "/proc/hardware support"
-	help
-	  Say Y here to support the /proc/hardware file, which gives you
-	  access to information about the machine you're running on,
-	  including the model, CPU, MMU, clock speed, BogoMIPS rating,
-	  and memory size.
-
-config ISA
-	bool
-	depends on Q40 || AMIGA_PCMCIA
-	default y
-	help
-	  Find out whether you have ISA slots on your motherboard.  ISA is the
-	  name of a bus system, i.e. the way the CPU talks to the other stuff
-	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
-	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
-	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
-
-config GENERIC_ISA_DMA
-	bool
-	depends on Q40 || AMIGA_PCMCIA
-	default y
-
-config ZONE_DMA
-	bool
-	default y
-
-source "drivers/pci/Kconfig"
-
-source "drivers/zorro/Kconfig"
+config PM
+        bool "Power Management support"
+        help
+          Support processor power management modes
 
 endmenu
+endif
 
 source "net/Kconfig"
 
 source "drivers/Kconfig"
 
+if MMU
+
 menu "Character devices"
 
 config ATARI_MFPSER
@@ -627,6 +235,8 @@
 
 endmenu
 
+endif
+
 source "fs/Kconfig"
 
 source "arch/m68k/Kconfig.debug"

diff --git a/arch/m68k/Kconfig.debug b/arch/m68k/Kconfig.debug
index f53b6d5..2bdb1b0 100644
--- a/arch/m68k/Kconfig.debug
+++ b/arch/m68k/Kconfig.debug

@@ -2,4 +2,38 @@
 
 source "lib/Kconfig.debug"
 
+if !MMU
+
+config FULLDEBUG
+	bool "Full Symbolic/Source Debugging support"
+	help
+	  Enable debugging symbols on kernel build.
+
+config HIGHPROFILE
+	bool "Use fast second timer for profiling"
+	depends on COLDFIRE
+	help
+	  Use a fast secondary clock to produce profiling information.
+
+config BOOTPARAM
+	bool 'Compiled-in Kernel Boot Parameter'
+
+config BOOTPARAM_STRING
+	string 'Kernel Boot Parameter'
+	default 'console=ttyS0,19200'
+	depends on BOOTPARAM
+
+config NO_KERNEL_MSG
+	bool "Suppress Kernel BUG Messages"
+	help
+	  Do not output any debug BUG messages within the kernel.
+
+config BDM_DISABLE
+	bool "Disable BDM signals"
+	depends on (EXPERIMENTAL && COLDFIRE)
+	help
+	  Disable the ColdFire CPU's BDM signals.
+
+endif
+
 endmenu

diff --git a/arch/m68k/Kconfig.mmu b/arch/m68k/Kconfig.mmu
new file mode 100644
index 0000000..16539b1
--- /dev/null
+++ b/arch/m68k/Kconfig.mmu

@@ -0,0 +1,417 @@
+config GENERIC_IOMAP
+	bool
+	default y
+
+config ARCH_MAY_HAVE_PC_FDC
+	bool
+	depends on BROKEN && (Q40 || SUN3X)
+	default y
+
+config ARCH_USES_GETTIMEOFFSET
+	def_bool y
+
+config EISA
+	bool
+	---help---
+	  The Extended Industry Standard Architecture (EISA) bus was
+	  developed as an open alternative to the IBM MicroChannel bus.
+
+	  The EISA bus provided some of the features of the IBM MicroChannel
+	  bus while maintaining backward compatibility with cards made for
+	  the older ISA bus.  The EISA bus saw limited use between 1988 and
+	  1995 when it was made obsolete by the PCI bus.
+
+	  Say Y here if you are building a kernel for an EISA-based machine.
+
+	  Otherwise, say N.
+
+config MCA
+	bool
+	help
+	  MicroChannel Architecture is found in some IBM PS/2 machines and
+	  laptops.  It is a bus system similar to PCI or ISA. See
+	  <file:Documentation/mca.txt> (and especially the web page given
+	  there) before attempting to build an MCA bus kernel.
+
+config PCMCIA
+	tristate
+	---help---
+	  Say Y here if you want to attach PCMCIA- or PC-cards to your Linux
+	  computer.  These are credit-card size devices such as network cards,
+	  modems or hard drives often used with laptops computers.  There are
+	  actually two varieties of these cards: the older 16 bit PCMCIA cards
+	  and the newer 32 bit CardBus cards.  If you want to use CardBus
+	  cards, you need to say Y here and also to "CardBus support" below.
+
+	  To use your PC-cards, you will need supporting software from David
+	  Hinds' pcmcia-cs package (see the file <file:Documentation/Changes>
+	  for location).  Please also read the PCMCIA-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  To compile this driver as modules, choose M here: the
+	  modules will be called pcmcia_core and ds.
+
+config AMIGA
+	bool "Amiga support"
+	select MMU_MOTOROLA if MMU
+	help
+	  This option enables support for the Amiga series of computers. If
+	  you plan to use this kernel on an Amiga, say Y here and browse the
+	  material available in <file:Documentation/m68k>; otherwise say N.
+
+config ATARI
+	bool "Atari support"
+	select MMU_MOTOROLA if MMU
+	help
+	  This option enables support for the 68000-based Atari series of
+	  computers (including the TT, Falcon and Medusa). If you plan to use
+	  this kernel on an Atari, say Y here and browse the material
+	  available in <file:Documentation/m68k>; otherwise say N.
+
+config MAC
+	bool "Macintosh support"
+	select MMU_MOTOROLA if MMU
+	help
+	  This option enables support for the Apple Macintosh series of
+	  computers (yes, there is experimental support now, at least for part
+	  of the series).
+
+	  Say N unless you're willing to code the remaining necessary support.
+	  ;)
+
+config NUBUS
+	bool
+	depends on MAC
+	default y
+
+config M68K_L2_CACHE
+	bool
+	depends on MAC
+	default y
+
+config APOLLO
+	bool "Apollo support"
+	select MMU_MOTOROLA if MMU
+	help
+	  Say Y here if you want to run Linux on an MC680x0-based Apollo
+	  Domain workstation such as the DN3500.
+
+config VME
+	bool "VME (Motorola and BVM) support"
+	select MMU_MOTOROLA if MMU
+	help
+	  Say Y here if you want to build a kernel for a 680x0 based VME
+	  board.  Boards currently supported include Motorola boards MVME147,
+	  MVME162, MVME166, MVME167, MVME172, and MVME177.  BVME4000 and
+	  BVME6000 boards from BVM Ltd are also supported.
+
+config MVME147
+	bool "MVME147 support"
+	depends on VME
+	help
+	  Say Y to include support for early Motorola VME boards.  This will
+	  build a kernel which can run on MVME147 single-board computers.  If
+	  you select this option you will have to select the appropriate
+	  drivers for SCSI, Ethernet and serial ports later on.
+
+config MVME16x
+	bool "MVME162, 166 and 167 support"
+	depends on VME
+	help
+	  Say Y to include support for Motorola VME boards.  This will build a
+	  kernel which can run on MVME162, MVME166, MVME167, MVME172, and
+	  MVME177 boards.  If you select this option you will have to select
+	  the appropriate drivers for SCSI, Ethernet and serial ports later
+	  on.
+
+config BVME6000
+	bool "BVME4000 and BVME6000 support"
+	depends on VME
+	help
+	  Say Y to include support for VME boards from BVM Ltd.  This will
+	  build a kernel which can run on BVME4000 and BVME6000 boards.  If
+	  you select this option you will have to select the appropriate
+	  drivers for SCSI, Ethernet and serial ports later on.
+
+config HP300
+	bool "HP9000/300 and HP9000/400 support"
+	select MMU_MOTOROLA if MMU
+	help
+	  This option enables support for the HP9000/300 and HP9000/400 series
+	  of workstations. Support for these machines is still somewhat
+	  experimental. If you plan to try to use the kernel on such a machine
+	  say Y here.
+	  Everybody else says N.
+
+config DIO
+	bool "DIO bus support"
+	depends on HP300
+	default y
+	help
+	  Say Y here to enable support for the "DIO" expansion bus used in
+	  HP300 machines. If you are using such a system you almost certainly
+	  want this.
+
+config SUN3X
+	bool "Sun3x support"
+	select MMU_MOTOROLA if MMU
+	select M68030
+	help
+	  This option enables support for the Sun 3x series of workstations.
+	  Be warned that this support is very experimental.
+	  Note that Sun 3x kernels are not compatible with Sun 3 hardware.
+	  General Linux information on the Sun 3x series (now discontinued)
+	  is at <http://www.angelfire.com/ca2/tech68k/sun3.html>.
+
+	  If you don't want to compile a kernel for a Sun 3x, say N.
+
+config Q40
+	bool "Q40/Q60 support"
+	select MMU_MOTOROLA if MMU
+	help
+	  The Q40 is a Motorola 68040-based successor to the Sinclair QL
+	  manufactured in Germany.  There is an official Q40 home page at
+	  <http://www.q40.de/>.  This option enables support for the Q40 and
+	  Q60. Select your CPU below.  For 68LC060 don't forget to enable FPU
+	  emulation.
+
+config SUN3
+	bool "Sun3 support"
+	depends on !MMU_MOTOROLA
+	select MMU_SUN3 if MMU
+	select M68020
+	help
+	  This option enables support for the Sun 3 series of workstations
+	  (3/50, 3/60, 3/1xx, 3/2xx systems). Enabling this option requires
+	  that all other hardware types must be disabled, as Sun 3 kernels
+	  are incompatible with all other m68k targets (including Sun 3x!).
+
+	  If you don't want to compile a kernel exclusively for a Sun 3, say N.
+
+config NATFEAT
+	bool "ARAnyM emulator support"
+	depends on ATARI
+	help
+	  This option enables support for ARAnyM native features, such as
+	  access to a disk image as /dev/hda.
+
+config NFBLOCK
+	tristate "NatFeat block device support"
+	depends on BLOCK && NATFEAT
+	help
+	  Say Y to include support for the ARAnyM NatFeat block device
+	  which allows direct access to the hard drives without using
+	  the hardware emulation.
+
+config NFCON
+	tristate "NatFeat console driver"
+	depends on NATFEAT
+	help
+	  Say Y to include support for the ARAnyM NatFeat console driver
+	  which allows the console output to be redirected to the stderr
+	  output of ARAnyM.
+
+config NFETH
+	tristate "NatFeat Ethernet support"
+	depends on NET_ETHERNET && NATFEAT
+	help
+	  Say Y to include support for the ARAnyM NatFeat network device
+	  which will emulate a regular ethernet device while presenting an
+	  ethertap device to the host system.
+
+comment "Processor type"
+
+config M68020
+	bool "68020 support"
+	help
+	  If you anticipate running this kernel on a computer with a MC68020
+	  processor, say Y. Otherwise, say N. Note that the 68020 requires a
+	  68851 MMU (Memory Management Unit) to run Linux/m68k, except on the
+	  Sun 3, which provides its own version.
+
+config M68030
+	bool "68030 support"
+	depends on !MMU_SUN3
+	help
+	  If you anticipate running this kernel on a computer with a MC68030
+	  processor, say Y. Otherwise, say N. Note that a MC68EC030 will not
+	  work, as it does not include an MMU (Memory Management Unit).
+
+config M68040
+	bool "68040 support"
+	depends on !MMU_SUN3
+	help
+	  If you anticipate running this kernel on a computer with a MC68LC040
+	  or MC68040 processor, say Y. Otherwise, say N. Note that an
+	  MC68EC040 will not work, as it does not include an MMU (Memory
+	  Management Unit).
+
+config M68060
+	bool "68060 support"
+	depends on !MMU_SUN3
+	help
+	  If you anticipate running this kernel on a computer with a MC68060
+	  processor, say Y. Otherwise, say N.
+
+config MMU_MOTOROLA
+	bool
+
+config MMU_SUN3
+	bool
+	depends on MMU && !MMU_MOTOROLA
+
+config M68KFPU_EMU
+	bool "Math emulation support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  At some point in the future, this will cause floating-point math
+	  instructions to be emulated by the kernel on machines that lack a
+	  floating-point math coprocessor.  Thrill-seekers and chronically
+	  sleep-deprived psychotic hacker types can say Y now, everyone else
+	  should probably wait a while.
+
+config M68KFPU_EMU_EXTRAPREC
+	bool "Math emulation extra precision"
+	depends on M68KFPU_EMU
+	help
+	  The fpu uses normally a few bit more during calculations for
+	  correct rounding, the emulator can (often) do the same but this
+	  extra calculation can cost quite some time, so you can disable
+	  it here. The emulator will then "only" calculate with a 64 bit
+	  mantissa and round slightly incorrect, what is more than enough
+	  for normal usage.
+
+config M68KFPU_EMU_ONLY
+	bool "Math emulation only kernel"
+	depends on M68KFPU_EMU
+	help
+	  This option prevents any floating-point instructions from being
+	  compiled into the kernel, thereby the kernel doesn't save any
+	  floating point context anymore during task switches, so this
+	  kernel will only be usable on machines without a floating-point
+	  math coprocessor. This makes the kernel a bit faster as no tests
+	  needs to be executed whether a floating-point instruction in the
+	  kernel should be executed or not.
+
+config ADVANCED
+	bool "Advanced configuration options"
+	---help---
+	  This gives you access to some advanced options for the CPU. The
+	  defaults should be fine for most users, but these options may make
+	  it possible for you to improve performance somewhat if you know what
+	  you are doing.
+
+	  Note that the answer to this question won't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about these options.
+
+	  Most users should say N to this question.
+
+config RMW_INSNS
+	bool "Use read-modify-write instructions"
+	depends on ADVANCED
+	---help---
+	  This allows to use certain instructions that work with indivisible
+	  read-modify-write bus cycles. While this is faster than the
+	  workaround of disabling interrupts, it can conflict with DMA
+	  ( = direct memory access) on many Amiga systems, and it is also said
+	  to destabilize other machines. It is very likely that this will
+	  cause serious problems on any Amiga or Atari Medusa if set. The only
+	  configuration where it should work are 68030-based Ataris, where it
+	  apparently improves performance. But you've been warned! Unless you
+	  really know what you are doing, say N. Try Y only if you're quite
+	  adventurous.
+
+config SINGLE_MEMORY_CHUNK
+	bool "Use one physical chunk of memory only" if ADVANCED && !SUN3
+	default y if SUN3
+	select NEED_MULTIPLE_NODES
+	help
+	  Ignore all but the first contiguous chunk of physical memory for VM
+	  purposes.  This will save a few bytes kernel size and may speed up
+	  some operations.  Say N if not sure.
+
+config 060_WRITETHROUGH
+	bool "Use write-through caching for 68060 supervisor accesses"
+	depends on ADVANCED && M68060
+	---help---
+	  The 68060 generally uses copyback caching of recently accessed data.
+	  Copyback caching means that memory writes will be held in an on-chip
+	  cache and only written back to memory some time later.  Saying Y
+	  here will force supervisor (kernel) accesses to use writethrough
+	  caching.  Writethrough caching means that data is written to memory
+	  straight away, so that cache and memory data always agree.
+	  Writethrough caching is less efficient, but is needed for some
+	  drivers on 68060 based systems where the 68060 bus snooping signal
+	  is hardwired on.  The 53c710 SCSI driver is known to suffer from
+	  this problem.
+
+config ARCH_DISCONTIGMEM_ENABLE
+	def_bool !SINGLE_MEMORY_CHUNK
+
+config NODES_SHIFT
+	int
+	default "3"
+	depends on !SINGLE_MEMORY_CHUNK
+
+config ZORRO
+	bool "Amiga Zorro (AutoConfig) bus support"
+	depends on AMIGA
+	help
+	  This enables support for the Zorro bus in the Amiga. If you have
+	  expansion cards in your Amiga that conform to the Amiga
+	  AutoConfig(tm) specification, say Y, otherwise N. Note that even
+	  expansion cards that do not fit in the Zorro slots but fit in e.g.
+	  the CPU slot may fall in this category, so you have to say Y to let
+	  Linux use these.
+
+config AMIGA_PCMCIA
+	bool "Amiga 1200/600 PCMCIA support (EXPERIMENTAL)"
+	depends on AMIGA && EXPERIMENTAL
+	help
+	  Include support in the kernel for pcmcia on Amiga 1200 and Amiga
+	  600. If you intend to use pcmcia cards say Y; otherwise say N.
+
+config STRAM_PROC
+	bool "ST-RAM statistics in /proc"
+	depends on ATARI
+	help
+	  Say Y here to report ST-RAM usage statistics in /proc/stram.
+
+config HEARTBEAT
+	bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40
+	default y if !AMIGA && !APOLLO && !ATARI && !MAC && !Q40 && HP300
+	help
+	  Use the power-on LED on your machine as a load meter.  The exact
+	  behavior is platform-dependent, but normally the flash frequency is
+	  a hyperbolic function of the 5-minute load average.
+
+# We have a dedicated heartbeat LED. :-)
+config PROC_HARDWARE
+	bool "/proc/hardware support"
+	help
+	  Say Y here to support the /proc/hardware file, which gives you
+	  access to information about the machine you're running on,
+	  including the model, CPU, MMU, clock speed, BogoMIPS rating,
+	  and memory size.
+
+config ISA
+	bool
+	depends on Q40 || AMIGA_PCMCIA
+	default y
+	help
+	  Find out whether you have ISA slots on your motherboard.  ISA is the
+	  name of a bus system, i.e. the way the CPU talks to the other stuff
+	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
+	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
+	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
+
+config GENERIC_ISA_DMA
+	bool
+	depends on Q40 || AMIGA_PCMCIA
+	default y
+
+source "drivers/pci/Kconfig"
+
+source "drivers/zorro/Kconfig"
+

diff --git a/arch/m68knommu/Kconfig b/arch/m68k/Kconfig.nommu
similarity index 92%
rename from arch/m68knommu/Kconfig
rename to arch/m68k/Kconfig.nommu
index b5424cf..273bcca 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68k/Kconfig.nommu

@@ -1,43 +1,7 @@
-config M68K
-	bool
-	default y
-	select HAVE_IDE
-	select HAVE_GENERIC_HARDIRQS
-	select GENERIC_HARDIRQS_NO_DEPRECATED
-
-config MMU
-	bool
-	default n
-
-config NO_DMA
-	bool
-	depends on !COLDFIRE
-	default y
-
 config FPU
 	bool
 	default n
 
-config ZONE_DMA
-	bool
-	default y
-
-config RWSEM_GENERIC_SPINLOCK
-	bool
-	default y
-
-config RWSEM_XCHGADD_ALGORITHM
-	bool
-	default n
-
-config ARCH_HAS_ILOG2_U32
-	bool
-	default n
-
-config ARCH_HAS_ILOG2_U64
-	bool
-	default n
-
 config GENERIC_FIND_NEXT_BIT
 	bool
 	default y
@@ -46,29 +10,14 @@
 	bool
 	default n
 
-config GENERIC_HWEIGHT
-	bool
-	default y
-
-config GENERIC_CALIBRATE_DELAY
-	bool
-	default y
-
 config GENERIC_CMOS_UPDATE
 	bool
 	default y
 
-config TIME_LOW_RES
-	bool
-	default y
-
 config GENERIC_CLOCKEVENTS
 	bool
 	default n
 
-config NO_IOPORT
-	def_bool y
-
 config COLDFIRE_SW_A7
 	bool
 	default n
@@ -85,12 +34,6 @@
 config HAVE_IPSBAR
 	bool
 
-source "init/Kconfig"
-
-source "kernel/Kconfig.freezer"
-
-menu "Processor type and features"
-
 choice
 	prompt "CPU"
 	default M68EZ328
@@ -630,11 +573,6 @@
 	  running more threads on a system and also reduces the pressure
 	  on the VM subsystem for higher order allocations.
 
-config HZ
-	int
-	default 1000 if CLEOPATRA
-	default 100
-
 comment "RAM configuration"
 
 config RAMBASE
@@ -803,10 +741,6 @@
 
 source "kernel/time/Kconfig"
 
-source "mm/Kconfig"
-
-endmenu
-
 config ISA_DMA_API
 	bool
 	depends on !M5272
@@ -814,31 +748,3 @@
 
 source "drivers/pcmcia/Kconfig"
 
-menu "Executable file formats"
-
-source "fs/Kconfig.binfmt"
-
-endmenu
-
-menu "Power management options"
-
-config PM
-	bool "Power Management support"
-	help
-	  Support processor power management modes
-
-endmenu
-
-source "net/Kconfig"
-
-source "drivers/Kconfig"
-
-source "fs/Kconfig"
-
-source "arch/m68knommu/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"

diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index b793163..be46cad 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile

@@ -1,123 +1,7 @@
-#
-# m68k/Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" and "archdep" for cleaning up and making dependencies for
-# this architecture
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1994 by Hamish Macdonald
-#
-
 KBUILD_DEFCONFIG := multi_defconfig
 
-# override top level makefile
-AS += -m68020
-LDFLAGS := -m m68kelf
-KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/m68k/kernel/module.lds
-ifneq ($(SUBARCH),$(ARCH))
-	ifeq ($(CROSS_COMPILE),)
-		CROSS_COMPILE := $(call cc-cross-prefix, \
-			m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-)
-	endif
-endif
-
-ifdef CONFIG_SUN3
-LDFLAGS_vmlinux = -N
-endif
-
-CHECKFLAGS += -D__mc68000__
-
-# without -fno-strength-reduce the 53c7xx.c driver fails ;-(
-KBUILD_CFLAGS += -pipe -fno-strength-reduce -ffixed-a2
-
-# enable processor switch if compiled only for a single cpu
-ifndef CONFIG_M68020
-ifndef CONFIG_M68030
-
-ifndef CONFIG_M68060
-KBUILD_CFLAGS += -m68040
-endif
-
-ifndef CONFIG_M68040
-KBUILD_CFLAGS += -m68060
-endif
-
-endif
-endif
-
-ifdef CONFIG_KGDB
-# If configured for kgdb support, include debugging infos and keep the
-# frame pointer
-KBUILD_CFLAGS := $(subst -fomit-frame-pointer,,$(KBUILD_CFLAGS)) -g
-endif
-
-ifndef CONFIG_SUN3
-head-y := arch/m68k/kernel/head.o
+ifdef CONFIG_MMU
+include $(srctree)/arch/m68k/Makefile_mm
 else
-head-y := arch/m68k/kernel/sun3-head.o
+include $(srctree)/arch/m68k/Makefile_no
 endif
-
-core-y				+= arch/m68k/kernel/	arch/m68k/mm/
-libs-y				+= arch/m68k/lib/
-
-core-$(CONFIG_Q40)		+= arch/m68k/q40/
-core-$(CONFIG_AMIGA)		+= arch/m68k/amiga/
-core-$(CONFIG_ATARI)		+= arch/m68k/atari/
-core-$(CONFIG_MAC)		+= arch/m68k/mac/
-core-$(CONFIG_HP300)		+= arch/m68k/hp300/
-core-$(CONFIG_APOLLO)		+= arch/m68k/apollo/
-core-$(CONFIG_MVME147)		+= arch/m68k/mvme147/
-core-$(CONFIG_MVME16x)		+= arch/m68k/mvme16x/
-core-$(CONFIG_BVME6000)		+= arch/m68k/bvme6000/
-core-$(CONFIG_SUN3X)		+= arch/m68k/sun3x/	arch/m68k/sun3/
-core-$(CONFIG_SUN3)		+= arch/m68k/sun3/	arch/m68k/sun3/prom/
-core-$(CONFIG_NATFEAT)		+= arch/m68k/emu/
-core-$(CONFIG_M68040)		+= arch/m68k/fpsp040/
-core-$(CONFIG_M68060)		+= arch/m68k/ifpsp060/
-core-$(CONFIG_M68KFPU_EMU)	+= arch/m68k/math-emu/
-
-all:	zImage
-
-lilo:	vmlinux
-	if [ -f $(INSTALL_PATH)/vmlinux ]; then mv -f $(INSTALL_PATH)/vmlinux $(INSTALL_PATH)/vmlinux.old; fi
-	if [ -f $(INSTALL_PATH)/System.map ]; then mv -f $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
-	cat vmlinux > $(INSTALL_PATH)/vmlinux
-	cp System.map $(INSTALL_PATH)/System.map
-	if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
-
-zImage compressed: vmlinux.gz
-
-vmlinux.gz: vmlinux
-
-ifndef CONFIG_KGDB
-	cp vmlinux vmlinux.tmp
-	$(STRIP) vmlinux.tmp
-	gzip -9c vmlinux.tmp >vmlinux.gz
-	rm vmlinux.tmp
-else
-	gzip -9c vmlinux >vmlinux.gz
-endif
-
-bzImage: vmlinux.bz2
-
-vmlinux.bz2: vmlinux
-
-ifndef CONFIG_KGDB
-	cp vmlinux vmlinux.tmp
-	$(STRIP) vmlinux.tmp
-	bzip2 -1c vmlinux.tmp >vmlinux.bz2
-	rm vmlinux.tmp
-else
-	bzip2 -1c vmlinux >vmlinux.bz2
-endif
-
-archclean:
-	rm -f vmlinux.gz vmlinux.bz2
-
-install:
-	sh $(srctree)/arch/m68k/install.sh $(KERNELRELEASE) vmlinux.gz System.map "$(INSTALL_PATH)"

diff --git a/arch/m68k/Makefile_mm b/arch/m68k/Makefile_mm
new file mode 100644
index 0000000..d449b6d
--- /dev/null
+++ b/arch/m68k/Makefile_mm

@@ -0,0 +1,121 @@
+#
+# m68k/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Hamish Macdonald
+#
+
+# override top level makefile
+AS += -m68020
+LDFLAGS := -m m68kelf
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/m68k/kernel/module.lds
+ifneq ($(SUBARCH),$(ARCH))
+	ifeq ($(CROSS_COMPILE),)
+		CROSS_COMPILE := $(call cc-cross-prefix, \
+			m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-)
+	endif
+endif
+
+ifdef CONFIG_SUN3
+LDFLAGS_vmlinux = -N
+endif
+
+CHECKFLAGS += -D__mc68000__
+
+# without -fno-strength-reduce the 53c7xx.c driver fails ;-(
+KBUILD_CFLAGS += -pipe -fno-strength-reduce -ffixed-a2
+
+# enable processor switch if compiled only for a single cpu
+ifndef CONFIG_M68020
+ifndef CONFIG_M68030
+
+ifndef CONFIG_M68060
+KBUILD_CFLAGS += -m68040
+endif
+
+ifndef CONFIG_M68040
+KBUILD_CFLAGS += -m68060
+endif
+
+endif
+endif
+
+ifdef CONFIG_KGDB
+# If configured for kgdb support, include debugging infos and keep the
+# frame pointer
+KBUILD_CFLAGS := $(subst -fomit-frame-pointer,,$(KBUILD_CFLAGS)) -g
+endif
+
+ifndef CONFIG_SUN3
+head-y := arch/m68k/kernel/head.o
+else
+head-y := arch/m68k/kernel/sun3-head.o
+endif
+
+core-y				+= arch/m68k/kernel/	arch/m68k/mm/
+libs-y				+= arch/m68k/lib/
+
+core-$(CONFIG_Q40)		+= arch/m68k/q40/
+core-$(CONFIG_AMIGA)		+= arch/m68k/amiga/
+core-$(CONFIG_ATARI)		+= arch/m68k/atari/
+core-$(CONFIG_MAC)		+= arch/m68k/mac/
+core-$(CONFIG_HP300)		+= arch/m68k/hp300/
+core-$(CONFIG_APOLLO)		+= arch/m68k/apollo/
+core-$(CONFIG_MVME147)		+= arch/m68k/mvme147/
+core-$(CONFIG_MVME16x)		+= arch/m68k/mvme16x/
+core-$(CONFIG_BVME6000)		+= arch/m68k/bvme6000/
+core-$(CONFIG_SUN3X)		+= arch/m68k/sun3x/	arch/m68k/sun3/
+core-$(CONFIG_SUN3)		+= arch/m68k/sun3/	arch/m68k/sun3/prom/
+core-$(CONFIG_NATFEAT)		+= arch/m68k/emu/
+core-$(CONFIG_M68040)		+= arch/m68k/fpsp040/
+core-$(CONFIG_M68060)		+= arch/m68k/ifpsp060/
+core-$(CONFIG_M68KFPU_EMU)	+= arch/m68k/math-emu/
+
+all:	zImage
+
+lilo:	vmlinux
+	if [ -f $(INSTALL_PATH)/vmlinux ]; then mv -f $(INSTALL_PATH)/vmlinux $(INSTALL_PATH)/vmlinux.old; fi
+	if [ -f $(INSTALL_PATH)/System.map ]; then mv -f $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
+	cat vmlinux > $(INSTALL_PATH)/vmlinux
+	cp System.map $(INSTALL_PATH)/System.map
+	if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
+
+zImage compressed: vmlinux.gz
+
+vmlinux.gz: vmlinux
+
+ifndef CONFIG_KGDB
+	cp vmlinux vmlinux.tmp
+	$(STRIP) vmlinux.tmp
+	gzip -9c vmlinux.tmp >vmlinux.gz
+	rm vmlinux.tmp
+else
+	gzip -9c vmlinux >vmlinux.gz
+endif
+
+bzImage: vmlinux.bz2
+
+vmlinux.bz2: vmlinux
+
+ifndef CONFIG_KGDB
+	cp vmlinux vmlinux.tmp
+	$(STRIP) vmlinux.tmp
+	bzip2 -1c vmlinux.tmp >vmlinux.bz2
+	rm vmlinux.tmp
+else
+	bzip2 -1c vmlinux >vmlinux.bz2
+endif
+
+archclean:
+	rm -f vmlinux.gz vmlinux.bz2
+
+install:
+	sh $(srctree)/arch/m68k/install.sh $(KERNELRELEASE) vmlinux.gz System.map "$(INSTALL_PATH)"

diff --git a/arch/m68knommu/Makefile b/arch/m68k/Makefile_no
similarity index 91%
rename from arch/m68knommu/Makefile
rename to arch/m68k/Makefile_no
index 589613f..81652ab 100644
--- a/arch/m68knommu/Makefile
+++ b/arch/m68k/Makefile_no

@@ -1,5 +1,5 @@
 #
-# arch/m68knommu/Makefile
+# arch/m68k/Makefile
 #
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
@@ -8,8 +8,6 @@
 # (C) Copyright 2002, Greg Ungerer <gerg@snapgear.com>
 #
 
-KBUILD_DEFCONFIG := m5208evb_defconfig
-
 platform-$(CONFIG_M68328)	:= 68328
 platform-$(CONFIG_M68EZ328)	:= 68EZ328
 platform-$(CONFIG_M68VZ328)	:= 68VZ328
@@ -82,7 +80,7 @@
 CPUCLASS := $(cpuclass-y)
 
 ifneq ($(CPUCLASS),$(PLATFORM))
-CLASSDIR := arch/m68knommu/platform/$(cpuclass-y)/
+CLASSDIR := arch/m68k/platform/$(cpuclass-y)/
 endif
 
 export PLATFORM BOARD MODEL CPUCLASS
@@ -114,13 +112,13 @@
 KBUILD_CFLAGS += -D__linux__
 KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\"
 
-head-y := arch/m68knommu/platform/$(cpuclass-y)/head.o
+head-y := arch/m68k/platform/$(cpuclass-y)/head.o
 
-core-y	+= arch/m68knommu/kernel/ \
-	   arch/m68knommu/mm/ \
+core-y	+= arch/m68k/kernel/ \
+	   arch/m68k/mm/ \
 	   $(CLASSDIR) \
-	   arch/m68knommu/platform/$(PLATFORM)/
-libs-y	+= arch/m68knommu/lib/
+	   arch/m68k/platform/$(PLATFORM)/
+libs-y	+= arch/m68k/lib/
 
 archclean:
 

diff --git a/arch/m68knommu/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig
similarity index 97%
rename from arch/m68knommu/configs/m5208evb_defconfig
rename to arch/m68k/configs/m5208evb_defconfig
index 2f5655c..c161682 100644
--- a/arch/m68knommu/configs/m5208evb_defconfig
+++ b/arch/m68k/configs/m5208evb_defconfig

@@ -1,3 +1,4 @@
+# CONFIG_MMU is not set
 CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -37,6 +38,7 @@
 # CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y

diff --git a/arch/m68knommu/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig
similarity index 96%
rename from arch/m68knommu/configs/m5249evb_defconfig
rename to arch/m68k/configs/m5249evb_defconfig
index 16df72b..a6599e4 100644
--- a/arch/m68knommu/configs/m5249evb_defconfig
+++ b/arch/m68k/configs/m5249evb_defconfig

@@ -1,3 +1,4 @@
+# CONFIG_MMU is not set
 CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -35,6 +36,7 @@
 # CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y

diff --git a/arch/m68knommu/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig
similarity index 96%
rename from arch/m68knommu/configs/m5272c3_defconfig
rename to arch/m68k/configs/m5272c3_defconfig
index 4e6ea50..3fa60a5 100644
--- a/arch/m68knommu/configs/m5272c3_defconfig
+++ b/arch/m68k/configs/m5272c3_defconfig

@@ -1,3 +1,4 @@
+# CONFIG_MMU is not set
 CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -33,6 +34,7 @@
 # CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y

diff --git a/arch/m68knommu/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig
similarity index 96%
rename from arch/m68knommu/configs/m5275evb_defconfig
rename to arch/m68k/configs/m5275evb_defconfig
index f3dd741..33c32ae 100644
--- a/arch/m68knommu/configs/m5275evb_defconfig
+++ b/arch/m68k/configs/m5275evb_defconfig

@@ -1,3 +1,4 @@
+# CONFIG_MMU is not set
 CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -36,6 +37,7 @@
 # CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y

diff --git a/arch/m68knommu/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig
similarity index 97%
rename from arch/m68knommu/configs/m5307c3_defconfig
rename to arch/m68k/configs/m5307c3_defconfig
index bce0a20..43795f4 100644
--- a/arch/m68knommu/configs/m5307c3_defconfig
+++ b/arch/m68k/configs/m5307c3_defconfig

@@ -1,3 +1,4 @@
+# CONFIG_MMU is not set
 CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -35,6 +36,7 @@
 # CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y

diff --git a/arch/m68knommu/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig
similarity index 96%
rename from arch/m68knommu/configs/m5407c3_defconfig
rename to arch/m68k/configs/m5407c3_defconfig
index 618cc32..72746c5 100644
--- a/arch/m68knommu/configs/m5407c3_defconfig
+++ b/arch/m68k/configs/m5407c3_defconfig

@@ -1,3 +1,4 @@
+# CONFIG_MMU is not set
 CONFIG_EXPERIMENTAL=y
 CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
@@ -35,6 +36,7 @@
 # CONFIG_INET_LRO is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
+# CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y

diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index 55d5d6b..c482ebc 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile

@@ -1,17 +1,5 @@
-#
-# Makefile for the linux kernel.
-#
-
-ifndef CONFIG_SUN3
-  extra-y := head.o
+ifdef CONFIG_MMU
+include arch/m68k/kernel/Makefile_mm
 else
-  extra-y := sun3-head.o
+include arch/m68k/kernel/Makefile_no
 endif
-extra-y	+= vmlinux.lds
-
-obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
-	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
-
-devres-y = ../../../kernel/irq/devres.o
-
-obj-y$(CONFIG_MMU_SUN3) += dma.o	# no, it's not a typo

diff --git a/arch/m68k/kernel/Makefile_mm b/arch/m68k/kernel/Makefile_mm
new file mode 100644
index 0000000..55d5d6b
--- /dev/null
+++ b/arch/m68k/kernel/Makefile_mm

@@ -0,0 +1,17 @@
+#
+# Makefile for the linux kernel.
+#
+
+ifndef CONFIG_SUN3
+  extra-y := head.o
+else
+  extra-y := sun3-head.o
+endif
+extra-y	+= vmlinux.lds
+
+obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
+	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
+
+devres-y = ../../../kernel/irq/devres.o
+
+obj-y$(CONFIG_MMU_SUN3) += dma.o	# no, it's not a typo

diff --git a/arch/m68knommu/kernel/Makefile b/arch/m68k/kernel/Makefile_no
similarity index 100%
rename from arch/m68knommu/kernel/Makefile
rename to arch/m68k/kernel/Makefile_no


diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index 78e59b8..59a69a5 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c

@@ -1,100 +1,5 @@
-/*
- * This program is used to generate definitions needed by
- * assembly language modules.
- *
- * We use the technique used in the OSF Mach kernel code:
- * generate asm statements containing #defines,
- * compile this file to assembler, and then extract the
- * #defines from the assembly-language output.
- */
-
-#define ASM_OFFSETS_C
-
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/kbuild.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/amigahw.h>
-#include <linux/font.h>
-
-int main(void)
-{
-	/* offsets into the task struct */
-	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
-	DEFINE(TASK_INFO, offsetof(struct task_struct, thread.info));
-	DEFINE(TASK_MM, offsetof(struct task_struct, mm));
 #ifdef CONFIG_MMU
-	DEFINE(TASK_TINFO, offsetof(struct task_struct, thread.info));
+#include "asm-offsets_mm.c"
+#else
+#include "asm-offsets_no.c"
 #endif
-
-	/* offsets into the thread struct */
-	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
-	DEFINE(THREAD_USP, offsetof(struct thread_struct, usp));
-	DEFINE(THREAD_SR, offsetof(struct thread_struct, sr));
-	DEFINE(THREAD_FS, offsetof(struct thread_struct, fs));
-	DEFINE(THREAD_CRP, offsetof(struct thread_struct, crp));
-	DEFINE(THREAD_ESP0, offsetof(struct thread_struct, esp0));
-	DEFINE(THREAD_FPREG, offsetof(struct thread_struct, fp));
-	DEFINE(THREAD_FPCNTL, offsetof(struct thread_struct, fpcntl));
-	DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fpstate));
-
-	/* offsets into the thread_info struct */
-	DEFINE(TINFO_PREEMPT, offsetof(struct thread_info, preempt_count));
-	DEFINE(TINFO_FLAGS, offsetof(struct thread_info, flags));
-
-	/* offsets into the pt_regs */
-	DEFINE(PT_OFF_D0, offsetof(struct pt_regs, d0));
-	DEFINE(PT_OFF_ORIG_D0, offsetof(struct pt_regs, orig_d0));
-	DEFINE(PT_OFF_D1, offsetof(struct pt_regs, d1));
-	DEFINE(PT_OFF_D2, offsetof(struct pt_regs, d2));
-	DEFINE(PT_OFF_D3, offsetof(struct pt_regs, d3));
-	DEFINE(PT_OFF_D4, offsetof(struct pt_regs, d4));
-	DEFINE(PT_OFF_D5, offsetof(struct pt_regs, d5));
-	DEFINE(PT_OFF_A0, offsetof(struct pt_regs, a0));
-	DEFINE(PT_OFF_A1, offsetof(struct pt_regs, a1));
-	DEFINE(PT_OFF_A2, offsetof(struct pt_regs, a2));
-	DEFINE(PT_OFF_PC, offsetof(struct pt_regs, pc));
-	DEFINE(PT_OFF_SR, offsetof(struct pt_regs, sr));
-	/* bitfields are a bit difficult */
-	DEFINE(PT_OFF_FORMATVEC, offsetof(struct pt_regs, pc) + 4);
-
-	/* offsets into the irq_cpustat_t struct */
-	DEFINE(CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
-
-	/* offsets into the bi_record struct */
-	DEFINE(BIR_TAG, offsetof(struct bi_record, tag));
-	DEFINE(BIR_SIZE, offsetof(struct bi_record, size));
-	DEFINE(BIR_DATA, offsetof(struct bi_record, data));
-
-	/* offsets into font_desc (drivers/video/console/font.h) */
-	DEFINE(FONT_DESC_IDX, offsetof(struct font_desc, idx));
-	DEFINE(FONT_DESC_NAME, offsetof(struct font_desc, name));
-	DEFINE(FONT_DESC_WIDTH, offsetof(struct font_desc, width));
-	DEFINE(FONT_DESC_HEIGHT, offsetof(struct font_desc, height));
-	DEFINE(FONT_DESC_DATA, offsetof(struct font_desc, data));
-	DEFINE(FONT_DESC_PREF, offsetof(struct font_desc, pref));
-
-	/* signal defines */
-	DEFINE(LSIGSEGV, SIGSEGV);
-	DEFINE(LSEGV_MAPERR, SEGV_MAPERR);
-	DEFINE(LSIGTRAP, SIGTRAP);
-	DEFINE(LTRAP_TRACE, TRAP_TRACE);
-
-	/* offsets into the custom struct */
-	DEFINE(CUSTOMBASE, &amiga_custom);
-	DEFINE(C_INTENAR, offsetof(struct CUSTOM, intenar));
-	DEFINE(C_INTREQR, offsetof(struct CUSTOM, intreqr));
-	DEFINE(C_INTENA, offsetof(struct CUSTOM, intena));
-	DEFINE(C_INTREQ, offsetof(struct CUSTOM, intreq));
-	DEFINE(C_SERDATR, offsetof(struct CUSTOM, serdatr));
-	DEFINE(C_SERDAT, offsetof(struct CUSTOM, serdat));
-	DEFINE(C_SERPER, offsetof(struct CUSTOM, serper));
-	DEFINE(CIAABASE, &ciaa);
-	DEFINE(CIABBASE, &ciab);
-	DEFINE(C_PRA, offsetof(struct CIA, pra));
-	DEFINE(ZTWOBASE, zTwoBase);
-
-	return 0;
-}

diff --git a/arch/m68k/kernel/asm-offsets_mm.c b/arch/m68k/kernel/asm-offsets_mm.c
new file mode 100644
index 0000000..78e59b8
--- /dev/null
+++ b/arch/m68k/kernel/asm-offsets_mm.c

@@ -0,0 +1,100 @@
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ * We use the technique used in the OSF Mach kernel code:
+ * generate asm statements containing #defines,
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ */
+
+#define ASM_OFFSETS_C
+
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <linux/kbuild.h>
+#include <asm/bootinfo.h>
+#include <asm/irq.h>
+#include <asm/amigahw.h>
+#include <linux/font.h>
+
+int main(void)
+{
+	/* offsets into the task struct */
+	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
+	DEFINE(TASK_INFO, offsetof(struct task_struct, thread.info));
+	DEFINE(TASK_MM, offsetof(struct task_struct, mm));
+#ifdef CONFIG_MMU
+	DEFINE(TASK_TINFO, offsetof(struct task_struct, thread.info));
+#endif
+
+	/* offsets into the thread struct */
+	DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
+	DEFINE(THREAD_USP, offsetof(struct thread_struct, usp));
+	DEFINE(THREAD_SR, offsetof(struct thread_struct, sr));
+	DEFINE(THREAD_FS, offsetof(struct thread_struct, fs));
+	DEFINE(THREAD_CRP, offsetof(struct thread_struct, crp));
+	DEFINE(THREAD_ESP0, offsetof(struct thread_struct, esp0));
+	DEFINE(THREAD_FPREG, offsetof(struct thread_struct, fp));
+	DEFINE(THREAD_FPCNTL, offsetof(struct thread_struct, fpcntl));
+	DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fpstate));
+
+	/* offsets into the thread_info struct */
+	DEFINE(TINFO_PREEMPT, offsetof(struct thread_info, preempt_count));
+	DEFINE(TINFO_FLAGS, offsetof(struct thread_info, flags));
+
+	/* offsets into the pt_regs */
+	DEFINE(PT_OFF_D0, offsetof(struct pt_regs, d0));
+	DEFINE(PT_OFF_ORIG_D0, offsetof(struct pt_regs, orig_d0));
+	DEFINE(PT_OFF_D1, offsetof(struct pt_regs, d1));
+	DEFINE(PT_OFF_D2, offsetof(struct pt_regs, d2));
+	DEFINE(PT_OFF_D3, offsetof(struct pt_regs, d3));
+	DEFINE(PT_OFF_D4, offsetof(struct pt_regs, d4));
+	DEFINE(PT_OFF_D5, offsetof(struct pt_regs, d5));
+	DEFINE(PT_OFF_A0, offsetof(struct pt_regs, a0));
+	DEFINE(PT_OFF_A1, offsetof(struct pt_regs, a1));
+	DEFINE(PT_OFF_A2, offsetof(struct pt_regs, a2));
+	DEFINE(PT_OFF_PC, offsetof(struct pt_regs, pc));
+	DEFINE(PT_OFF_SR, offsetof(struct pt_regs, sr));
+	/* bitfields are a bit difficult */
+	DEFINE(PT_OFF_FORMATVEC, offsetof(struct pt_regs, pc) + 4);
+
+	/* offsets into the irq_cpustat_t struct */
+	DEFINE(CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
+
+	/* offsets into the bi_record struct */
+	DEFINE(BIR_TAG, offsetof(struct bi_record, tag));
+	DEFINE(BIR_SIZE, offsetof(struct bi_record, size));
+	DEFINE(BIR_DATA, offsetof(struct bi_record, data));
+
+	/* offsets into font_desc (drivers/video/console/font.h) */
+	DEFINE(FONT_DESC_IDX, offsetof(struct font_desc, idx));
+	DEFINE(FONT_DESC_NAME, offsetof(struct font_desc, name));
+	DEFINE(FONT_DESC_WIDTH, offsetof(struct font_desc, width));
+	DEFINE(FONT_DESC_HEIGHT, offsetof(struct font_desc, height));
+	DEFINE(FONT_DESC_DATA, offsetof(struct font_desc, data));
+	DEFINE(FONT_DESC_PREF, offsetof(struct font_desc, pref));
+
+	/* signal defines */
+	DEFINE(LSIGSEGV, SIGSEGV);
+	DEFINE(LSEGV_MAPERR, SEGV_MAPERR);
+	DEFINE(LSIGTRAP, SIGTRAP);
+	DEFINE(LTRAP_TRACE, TRAP_TRACE);
+
+	/* offsets into the custom struct */
+	DEFINE(CUSTOMBASE, &amiga_custom);
+	DEFINE(C_INTENAR, offsetof(struct CUSTOM, intenar));
+	DEFINE(C_INTREQR, offsetof(struct CUSTOM, intreqr));
+	DEFINE(C_INTENA, offsetof(struct CUSTOM, intena));
+	DEFINE(C_INTREQ, offsetof(struct CUSTOM, intreq));
+	DEFINE(C_SERDATR, offsetof(struct CUSTOM, serdatr));
+	DEFINE(C_SERDAT, offsetof(struct CUSTOM, serdat));
+	DEFINE(C_SERPER, offsetof(struct CUSTOM, serper));
+	DEFINE(CIAABASE, &ciaa);
+	DEFINE(CIABBASE, &ciab);
+	DEFINE(C_PRA, offsetof(struct CIA, pra));
+	DEFINE(ZTWOBASE, zTwoBase);
+
+	return 0;
+}

diff --git a/arch/m68knommu/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets_no.c
similarity index 100%
rename from arch/m68knommu/kernel/asm-offsets.c
rename to arch/m68k/kernel/asm-offsets_no.c


diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 4bbb3c2..90e8cb7 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c

@@ -1,130 +1,5 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-#undef DEBUG
-
-#include <linux/dma-mapping.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include <asm/pgalloc.h>
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
-			 dma_addr_t *handle, gfp_t flag)
-{
-	struct page *page, **map;
-	pgprot_t pgprot;
-	void *addr;
-	int i, order;
-
-	pr_debug("dma_alloc_coherent: %d,%x\n", size, flag);
-
-	size = PAGE_ALIGN(size);
-	order = get_order(size);
-
-	page = alloc_pages(flag, order);
-	if (!page)
-		return NULL;
-
-	*handle = page_to_phys(page);
-	map = kmalloc(sizeof(struct page *) << order, flag & ~__GFP_DMA);
-	if (!map) {
-		__free_pages(page, order);
-		return NULL;
-	}
-	split_page(page, order);
-
-	order = 1 << order;
-	size >>= PAGE_SHIFT;
-	map[0] = page;
-	for (i = 1; i < size; i++)
-		map[i] = page + i;
-	for (; i < order; i++)
-		__free_page(page + i);
-	pgprot = __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
-	if (CPU_IS_040_OR_060)
-		pgprot_val(pgprot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S;
-	else
-		pgprot_val(pgprot) |= _PAGE_NOCACHE030;
-	addr = vmap(map, size, VM_MAP, pgprot);
-	kfree(map);
-
-	return addr;
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-void dma_free_coherent(struct device *dev, size_t size,
-		       void *addr, dma_addr_t handle)
-{
-	pr_debug("dma_free_coherent: %p, %x\n", addr, handle);
-	vfree(addr);
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-void dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
-				size_t size, enum dma_data_direction dir)
-{
-	switch (dir) {
-	case DMA_TO_DEVICE:
-		cache_push(handle, size);
-		break;
-	case DMA_FROM_DEVICE:
-		cache_clear(handle, size);
-		break;
-	default:
-		if (printk_ratelimit())
-			printk("dma_sync_single_for_device: unsupported dir %u\n", dir);
-		break;
-	}
-}
-EXPORT_SYMBOL(dma_sync_single_for_device);
-
-void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents,
-			    enum dma_data_direction dir)
-{
-	int i;
-
-	for (i = 0; i < nents; sg++, i++)
-		dma_sync_single_for_device(dev, sg->dma_address, sg->length, dir);
-}
-EXPORT_SYMBOL(dma_sync_sg_for_device);
-
-dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size,
-			  enum dma_data_direction dir)
-{
-	dma_addr_t handle = virt_to_bus(addr);
-
-	dma_sync_single_for_device(dev, handle, size, dir);
-	return handle;
-}
-EXPORT_SYMBOL(dma_map_single);
-
-dma_addr_t dma_map_page(struct device *dev, struct page *page,
-			unsigned long offset, size_t size,
-			enum dma_data_direction dir)
-{
-	dma_addr_t handle = page_to_phys(page) + offset;
-
-	dma_sync_single_for_device(dev, handle, size, dir);
-	return handle;
-}
-EXPORT_SYMBOL(dma_map_page);
-
-int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-	       enum dma_data_direction dir)
-{
-	int i;
-
-	for (i = 0; i < nents; sg++, i++) {
-		sg->dma_address = sg_phys(sg);
-		dma_sync_single_for_device(dev, sg->dma_address, sg->length, dir);
-	}
-	return nents;
-}
-EXPORT_SYMBOL(dma_map_sg);
+#ifdef CONFIG_MMU
+#include "dma_mm.c"
+#else
+#include "dma_no.c"
+#endif

diff --git a/arch/m68k/kernel/dma_mm.c b/arch/m68k/kernel/dma_mm.c
new file mode 100644
index 0000000..4bbb3c2
--- /dev/null
+++ b/arch/m68k/kernel/dma_mm.c

@@ -0,0 +1,130 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#undef DEBUG
+
+#include <linux/dma-mapping.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/pgalloc.h>
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *handle, gfp_t flag)
+{
+	struct page *page, **map;
+	pgprot_t pgprot;
+	void *addr;
+	int i, order;
+
+	pr_debug("dma_alloc_coherent: %d,%x\n", size, flag);
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+	page = alloc_pages(flag, order);
+	if (!page)
+		return NULL;
+
+	*handle = page_to_phys(page);
+	map = kmalloc(sizeof(struct page *) << order, flag & ~__GFP_DMA);
+	if (!map) {
+		__free_pages(page, order);
+		return NULL;
+	}
+	split_page(page, order);
+
+	order = 1 << order;
+	size >>= PAGE_SHIFT;
+	map[0] = page;
+	for (i = 1; i < size; i++)
+		map[i] = page + i;
+	for (; i < order; i++)
+		__free_page(page + i);
+	pgprot = __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
+	if (CPU_IS_040_OR_060)
+		pgprot_val(pgprot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S;
+	else
+		pgprot_val(pgprot) |= _PAGE_NOCACHE030;
+	addr = vmap(map, size, VM_MAP, pgprot);
+	kfree(map);
+
+	return addr;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size,
+		       void *addr, dma_addr_t handle)
+{
+	pr_debug("dma_free_coherent: %p, %x\n", addr, handle);
+	vfree(addr);
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
+				size_t size, enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		cache_push(handle, size);
+		break;
+	case DMA_FROM_DEVICE:
+		cache_clear(handle, size);
+		break;
+	default:
+		if (printk_ratelimit())
+			printk("dma_sync_single_for_device: unsupported dir %u\n", dir);
+		break;
+	}
+}
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents,
+			    enum dma_data_direction dir)
+{
+	int i;
+
+	for (i = 0; i < nents; sg++, i++)
+		dma_sync_single_for_device(dev, sg->dma_address, sg->length, dir);
+}
+EXPORT_SYMBOL(dma_sync_sg_for_device);
+
+dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size,
+			  enum dma_data_direction dir)
+{
+	dma_addr_t handle = virt_to_bus(addr);
+
+	dma_sync_single_for_device(dev, handle, size, dir);
+	return handle;
+}
+EXPORT_SYMBOL(dma_map_single);
+
+dma_addr_t dma_map_page(struct device *dev, struct page *page,
+			unsigned long offset, size_t size,
+			enum dma_data_direction dir)
+{
+	dma_addr_t handle = page_to_phys(page) + offset;
+
+	dma_sync_single_for_device(dev, handle, size, dir);
+	return handle;
+}
+EXPORT_SYMBOL(dma_map_page);
+
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+	       enum dma_data_direction dir)
+{
+	int i;
+
+	for (i = 0; i < nents; sg++, i++) {
+		sg->dma_address = sg_phys(sg);
+		dma_sync_single_for_device(dev, sg->dma_address, sg->length, dir);
+	}
+	return nents;
+}
+EXPORT_SYMBOL(dma_map_sg);

diff --git a/arch/m68knommu/kernel/dma.c b/arch/m68k/kernel/dma_no.c
similarity index 100%
rename from arch/m68knommu/kernel/dma.c
rename to arch/m68k/kernel/dma_no.c


diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 1559dea..081cf96 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S

@@ -1,753 +1,5 @@
-/* -*- mode: asm -*-
- *
- *  linux/arch/m68k/kernel/entry.S
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file README.legal in the main directory of this archive
- * for more details.
- *
- * Linux/m68k support by Hamish Macdonald
- *
- * 68060 fixes by Jesper Skov
- *
- */
-
-/*
- * entry.S  contains the system-call and fault low-level handling routines.
- * This also contains the timer-interrupt handler, as well as all interrupts
- * and faults that can result in a task-switch.
- *
- * NOTE: This code handles signal-recognition, which happens every time
- * after a timer-interrupt and after each system call.
- *
- */
-
-/*
- * 12/03/96 Jes: Currently we only support m68k single-cpu systems, so
- *               all pointers that used to be 'current' are now entry
- *               number 0 in the 'current_set' list.
- *
- *  6/05/00 RZ:	 addedd writeback completion after return from sighandler
- *		 for 68040
- */
-
-#include <linux/linkage.h>
-#include <asm/entry.h>
-#include <asm/errno.h>
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/traps.h>
-#include <asm/unistd.h>
-
-#include <asm/asm-offsets.h>
-
-.globl system_call, buserr, trap, resume
-.globl sys_call_table
-.globl sys_fork, sys_clone, sys_vfork
-.globl ret_from_interrupt, bad_interrupt
-.globl auto_irqhandler_fixup
-.globl user_irqvec_fixup, user_irqhandler_fixup
-
-.text
-ENTRY(buserr)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	buserr_c
-	addql	#4,%sp
-	jra	.Lret_from_exception
-
-ENTRY(trap)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	movel	%sp,%sp@-		| stack frame pointer argument
-	bsrl	trap_c
-	addql	#4,%sp
-	jra	.Lret_from_exception
-
-	| After a fork we jump here directly from resume,
-	| so that %d1 contains the previous task
-	| schedule_tail now used regardless of CONFIG_SMP
-ENTRY(ret_from_fork)
-	movel	%d1,%sp@-
-	jsr	schedule_tail
-	addql	#4,%sp
-	jra	.Lret_from_exception
-
-do_trace_entry:
-	movel	#-ENOSYS,%sp@(PT_OFF_D0)| needed for strace
-	subql	#4,%sp
-	SAVE_SWITCH_STACK
-	jbsr	syscall_trace
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	movel	%sp@(PT_OFF_ORIG_D0),%d0
-	cmpl	#NR_syscalls,%d0
-	jcs	syscall
-badsys:
-	movel	#-ENOSYS,%sp@(PT_OFF_D0)
-	jra	ret_from_syscall
-
-do_trace_exit:
-	subql	#4,%sp
-	SAVE_SWITCH_STACK
-	jbsr	syscall_trace
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	jra	.Lret_from_exception
-
-ENTRY(ret_from_signal)
-	tstb	%curptr@(TASK_INFO+TINFO_FLAGS+2)
-	jge	1f
-	jbsr	syscall_trace
-1:	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-/* on 68040 complete pending writebacks if any */
-#ifdef CONFIG_M68040
-	bfextu	%sp@(PT_OFF_FORMATVEC){#0,#4},%d0
-	subql	#7,%d0				| bus error frame ?
-	jbne	1f
-	movel	%sp,%sp@-
-	jbsr	berr_040cleanup
-	addql	#4,%sp
-1:
+#ifdef CONFIG_MMU
+#include "entry_mm.S"
+#else
+#include "entry_no.S"
 #endif
-	jra	.Lret_from_exception
-
-ENTRY(system_call)
-	SAVE_ALL_SYS
-
-	GET_CURRENT(%d1)
-	| save top of frame
-	movel	%sp,%curptr@(TASK_THREAD+THREAD_ESP0)
-
-	| syscall trace?
-	tstb	%curptr@(TASK_INFO+TINFO_FLAGS+2)
-	jmi	do_trace_entry
-	cmpl	#NR_syscalls,%d0
-	jcc	badsys
-syscall:
-	jbsr	@(sys_call_table,%d0:l:4)@(0)
-	movel	%d0,%sp@(PT_OFF_D0)	| save the return value
-ret_from_syscall:
-	|oriw	#0x0700,%sr
-	movew	%curptr@(TASK_INFO+TINFO_FLAGS+2),%d0
-	jne	syscall_exit_work
-1:	RESTORE_ALL
-
-syscall_exit_work:
-	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
-	bnes	1b			| if so, skip resched, signals
-	lslw	#1,%d0
-	jcs	do_trace_exit
-	jmi	do_delayed_trace
-	lslw	#8,%d0
-	jmi	do_signal_return
-	pea	resume_userspace
-	jra	schedule
-
-
-ENTRY(ret_from_exception)
-.Lret_from_exception:
-	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
-	bnes	1f			| if so, skip resched, signals
-	| only allow interrupts when we are really the last one on the
-	| kernel stack, otherwise stack overflow can occur during
-	| heavy interrupt load
-	andw	#ALLOWINT,%sr
-
-resume_userspace:
-	moveb	%curptr@(TASK_INFO+TINFO_FLAGS+3),%d0
-	jne	exit_work
-1:	RESTORE_ALL
-
-exit_work:
-	| save top of frame
-	movel	%sp,%curptr@(TASK_THREAD+THREAD_ESP0)
-	lslb	#1,%d0
-	jmi	do_signal_return
-	pea	resume_userspace
-	jra	schedule
-
-
-do_signal_return:
-	|andw	#ALLOWINT,%sr
-	subql	#4,%sp			| dummy return address
-	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	bsrl	do_signal
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
-	addql	#4,%sp
-	jbra	resume_userspace
-
-do_delayed_trace:
-	bclr	#7,%sp@(PT_OFF_SR)	| clear trace bit in SR
-	pea	1			| send SIGTRAP
-	movel	%curptr,%sp@-
-	pea	LSIGTRAP
-	jbsr	send_sig
-	addql	#8,%sp
-	addql	#4,%sp
-	jbra	resume_userspace
-
-
-/* This is the main interrupt handler for autovector interrupts */
-
-ENTRY(auto_inthandler)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	addqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
-					|  put exception # in d0
-	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
-	subw	#VEC_SPUR,%d0
-
-	movel	%sp,%sp@-
-	movel	%d0,%sp@-		|  put vector # on stack
-auto_irqhandler_fixup = . + 2
-	jsr	__m68k_handle_int	|  process the IRQ
-	addql	#8,%sp			|  pop parameters off stack
-
-ret_from_interrupt:
-	subqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-2:	RESTORE_ALL
-
-	ALIGN
-ret_from_last_interrupt:
-	moveq	#(~ALLOWINT>>8)&0xff,%d0
-	andb	%sp@(PT_OFF_SR),%d0
-	jne	2b
-
-	/* check if we need to do software interrupts */
-	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
-	jeq	.Lret_from_exception
-	pea	ret_from_exception
-	jra	do_softirq
-
-/* Handler for user defined interrupt vectors */
-
-ENTRY(user_inthandler)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	addqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
-					|  put exception # in d0
-	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
-user_irqvec_fixup = . + 2
-	subw	#VEC_USER,%d0
-
-	movel	%sp,%sp@-
-	movel	%d0,%sp@-		|  put vector # on stack
-user_irqhandler_fixup = . + 2
-	jsr	__m68k_handle_int	|  process the IRQ
-	addql	#8,%sp			|  pop parameters off stack
-
-	subqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-	RESTORE_ALL
-
-/* Handler for uninitialized and spurious interrupts */
-
-ENTRY(bad_inthandler)
-	SAVE_ALL_INT
-	GET_CURRENT(%d0)
-	addqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
-
-	movel	%sp,%sp@-
-	jsr	handle_badint
-	addql	#4,%sp
-
-	subqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
-	jeq	ret_from_last_interrupt
-	RESTORE_ALL
-
-
-ENTRY(sys_fork)
-	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	jbsr	m68k_fork
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
-	rts
-
-ENTRY(sys_clone)
-	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	jbsr	m68k_clone
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
-	rts
-
-ENTRY(sys_vfork)
-	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	jbsr	m68k_vfork
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
-	rts
-
-ENTRY(sys_sigreturn)
-	SAVE_SWITCH_STACK
-	jbsr	do_sigreturn
-	RESTORE_SWITCH_STACK
-	rts
-
-ENTRY(sys_rt_sigreturn)
-	SAVE_SWITCH_STACK
-	jbsr	do_rt_sigreturn
-	RESTORE_SWITCH_STACK
-	rts
-
-resume:
-	/*
-	 * Beware - when entering resume, prev (the current task) is
-	 * in a0, next (the new task) is in a1,so don't change these
-	 * registers until their contents are no longer needed.
-	 */
-
-	/* save sr */
-	movew	%sr,%a0@(TASK_THREAD+THREAD_SR)
-
-	/* save fs (sfc,%dfc) (may be pointing to kernel memory) */
-	movec	%sfc,%d0
-	movew	%d0,%a0@(TASK_THREAD+THREAD_FS)
-
-	/* save usp */
-	/* it is better to use a movel here instead of a movew 8*) */
-	movec	%usp,%d0
-	movel	%d0,%a0@(TASK_THREAD+THREAD_USP)
-
-	/* save non-scratch registers on stack */
-	SAVE_SWITCH_STACK
-
-	/* save current kernel stack pointer */
-	movel	%sp,%a0@(TASK_THREAD+THREAD_KSP)
-
-	/* save floating point context */
-#ifndef CONFIG_M68KFPU_EMU_ONLY
-#ifdef CONFIG_M68KFPU_EMU
-	tstl	m68k_fputype
-	jeq	3f
-#endif
-	fsave	%a0@(TASK_THREAD+THREAD_FPSTATE)
-
-#if defined(CONFIG_M68060)
-#if !defined(CPU_M68060_ONLY)
-	btst	#3,m68k_cputype+3
-	beqs	1f
-#endif
-	/* The 060 FPU keeps status in bits 15-8 of the first longword */
-	tstb	%a0@(TASK_THREAD+THREAD_FPSTATE+2)
-	jeq	3f
-#if !defined(CPU_M68060_ONLY)
-	jra	2f
-#endif
-#endif /* CONFIG_M68060 */
-#if !defined(CPU_M68060_ONLY)
-1:	tstb	%a0@(TASK_THREAD+THREAD_FPSTATE)
-	jeq	3f
-#endif
-2:	fmovemx	%fp0-%fp7,%a0@(TASK_THREAD+THREAD_FPREG)
-	fmoveml	%fpcr/%fpsr/%fpiar,%a0@(TASK_THREAD+THREAD_FPCNTL)
-3:
-#endif	/* CONFIG_M68KFPU_EMU_ONLY */
-	/* Return previous task in %d1 */
-	movel	%curptr,%d1
-
-	/* switch to new task (a1 contains new task) */
-	movel	%a1,%curptr
-
-	/* restore floating point context */
-#ifndef CONFIG_M68KFPU_EMU_ONLY
-#ifdef CONFIG_M68KFPU_EMU
-	tstl	m68k_fputype
-	jeq	4f
-#endif
-#if defined(CONFIG_M68060)
-#if !defined(CPU_M68060_ONLY)
-	btst	#3,m68k_cputype+3
-	beqs	1f
-#endif
-	/* The 060 FPU keeps status in bits 15-8 of the first longword */
-	tstb	%a1@(TASK_THREAD+THREAD_FPSTATE+2)
-	jeq	3f
-#if !defined(CPU_M68060_ONLY)
-	jra	2f
-#endif
-#endif /* CONFIG_M68060 */
-#if !defined(CPU_M68060_ONLY)
-1:	tstb	%a1@(TASK_THREAD+THREAD_FPSTATE)
-	jeq	3f
-#endif
-2:	fmovemx	%a1@(TASK_THREAD+THREAD_FPREG),%fp0-%fp7
-	fmoveml	%a1@(TASK_THREAD+THREAD_FPCNTL),%fpcr/%fpsr/%fpiar
-3:	frestore %a1@(TASK_THREAD+THREAD_FPSTATE)
-4:
-#endif	/* CONFIG_M68KFPU_EMU_ONLY */
-
-	/* restore the kernel stack pointer */
-	movel	%a1@(TASK_THREAD+THREAD_KSP),%sp
-
-	/* restore non-scratch registers */
-	RESTORE_SWITCH_STACK
-
-	/* restore user stack pointer */
-	movel	%a1@(TASK_THREAD+THREAD_USP),%a0
-	movel	%a0,%usp
-
-	/* restore fs (sfc,%dfc) */
-	movew	%a1@(TASK_THREAD+THREAD_FS),%a0
-	movec	%a0,%sfc
-	movec	%a0,%dfc
-
-	/* restore status register */
-	movew	%a1@(TASK_THREAD+THREAD_SR),%sr
-
-	rts
-
-.data
-ALIGN
-sys_call_table:
-	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
-	.long sys_exit
-	.long sys_fork
-	.long sys_read
-	.long sys_write
-	.long sys_open		/* 5 */
-	.long sys_close
-	.long sys_waitpid
-	.long sys_creat
-	.long sys_link
-	.long sys_unlink	/* 10 */
-	.long sys_execve
-	.long sys_chdir
-	.long sys_time
-	.long sys_mknod
-	.long sys_chmod		/* 15 */
-	.long sys_chown16
-	.long sys_ni_syscall				/* old break syscall holder */
-	.long sys_stat
-	.long sys_lseek
-	.long sys_getpid	/* 20 */
-	.long sys_mount
-	.long sys_oldumount
-	.long sys_setuid16
-	.long sys_getuid16
-	.long sys_stime		/* 25 */
-	.long sys_ptrace
-	.long sys_alarm
-	.long sys_fstat
-	.long sys_pause
-	.long sys_utime		/* 30 */
-	.long sys_ni_syscall				/* old stty syscall holder */
-	.long sys_ni_syscall				/* old gtty syscall holder */
-	.long sys_access
-	.long sys_nice
-	.long sys_ni_syscall	/* 35 */	/* old ftime syscall holder */
-	.long sys_sync
-	.long sys_kill
-	.long sys_rename
-	.long sys_mkdir
-	.long sys_rmdir		/* 40 */
-	.long sys_dup
-	.long sys_pipe
-	.long sys_times
-	.long sys_ni_syscall				/* old prof syscall holder */
-	.long sys_brk		/* 45 */
-	.long sys_setgid16
-	.long sys_getgid16
-	.long sys_signal
-	.long sys_geteuid16
-	.long sys_getegid16	/* 50 */
-	.long sys_acct
-	.long sys_umount				/* recycled never used phys() */
-	.long sys_ni_syscall				/* old lock syscall holder */
-	.long sys_ioctl
-	.long sys_fcntl		/* 55 */
-	.long sys_ni_syscall				/* old mpx syscall holder */
-	.long sys_setpgid
-	.long sys_ni_syscall				/* old ulimit syscall holder */
-	.long sys_ni_syscall
-	.long sys_umask		/* 60 */
-	.long sys_chroot
-	.long sys_ustat
-	.long sys_dup2
-	.long sys_getppid
-	.long sys_getpgrp	/* 65 */
-	.long sys_setsid
-	.long sys_sigaction
-	.long sys_sgetmask
-	.long sys_ssetmask
-	.long sys_setreuid16	/* 70 */
-	.long sys_setregid16
-	.long sys_sigsuspend
-	.long sys_sigpending
-	.long sys_sethostname
-	.long sys_setrlimit	/* 75 */
-	.long sys_old_getrlimit
-	.long sys_getrusage
-	.long sys_gettimeofday
-	.long sys_settimeofday
-	.long sys_getgroups16	/* 80 */
-	.long sys_setgroups16
-	.long sys_old_select
-	.long sys_symlink
-	.long sys_lstat
-	.long sys_readlink	/* 85 */
-	.long sys_uselib
-	.long sys_swapon
-	.long sys_reboot
-	.long sys_old_readdir
-	.long sys_old_mmap	/* 90 */
-	.long sys_munmap
-	.long sys_truncate
-	.long sys_ftruncate
-	.long sys_fchmod
-	.long sys_fchown16	/* 95 */
-	.long sys_getpriority
-	.long sys_setpriority
-	.long sys_ni_syscall				/* old profil syscall holder */
-	.long sys_statfs
-	.long sys_fstatfs	/* 100 */
-	.long sys_ni_syscall				/* ioperm for i386 */
-	.long sys_socketcall
-	.long sys_syslog
-	.long sys_setitimer
-	.long sys_getitimer	/* 105 */
-	.long sys_newstat
-	.long sys_newlstat
-	.long sys_newfstat
-	.long sys_ni_syscall
-	.long sys_ni_syscall	/* 110 */	/* iopl for i386 */
-	.long sys_vhangup
-	.long sys_ni_syscall				/* obsolete idle() syscall */
-	.long sys_ni_syscall				/* vm86old for i386 */
-	.long sys_wait4
-	.long sys_swapoff	/* 115 */
-	.long sys_sysinfo
-	.long sys_ipc
-	.long sys_fsync
-	.long sys_sigreturn
-	.long sys_clone		/* 120 */
-	.long sys_setdomainname
-	.long sys_newuname
-	.long sys_cacheflush				/* modify_ldt for i386 */
-	.long sys_adjtimex
-	.long sys_mprotect	/* 125 */
-	.long sys_sigprocmask
-	.long sys_ni_syscall		/* old "create_module" */
-	.long sys_init_module
-	.long sys_delete_module
-	.long sys_ni_syscall	/* 130 - old "get_kernel_syms" */
-	.long sys_quotactl
-	.long sys_getpgid
-	.long sys_fchdir
-	.long sys_bdflush
-	.long sys_sysfs		/* 135 */
-	.long sys_personality
-	.long sys_ni_syscall				/* for afs_syscall */
-	.long sys_setfsuid16
-	.long sys_setfsgid16
-	.long sys_llseek	/* 140 */
-	.long sys_getdents
-	.long sys_select
-	.long sys_flock
-	.long sys_msync
-	.long sys_readv		/* 145 */
-	.long sys_writev
-	.long sys_getsid
-	.long sys_fdatasync
-	.long sys_sysctl
-	.long sys_mlock		/* 150 */
-	.long sys_munlock
-	.long sys_mlockall
-	.long sys_munlockall
-	.long sys_sched_setparam
-	.long sys_sched_getparam	/* 155 */
-	.long sys_sched_setscheduler
-	.long sys_sched_getscheduler
-	.long sys_sched_yield
-	.long sys_sched_get_priority_max
-	.long sys_sched_get_priority_min  /* 160 */
-	.long sys_sched_rr_get_interval
-	.long sys_nanosleep
-	.long sys_mremap
-	.long sys_setresuid16
-	.long sys_getresuid16	/* 165 */
-	.long sys_getpagesize
-	.long sys_ni_syscall		/* old sys_query_module */
-	.long sys_poll
-	.long sys_nfsservctl
-	.long sys_setresgid16	/* 170 */
-	.long sys_getresgid16
-	.long sys_prctl
-	.long sys_rt_sigreturn
-	.long sys_rt_sigaction
-	.long sys_rt_sigprocmask	/* 175 */
-	.long sys_rt_sigpending
-	.long sys_rt_sigtimedwait
-	.long sys_rt_sigqueueinfo
-	.long sys_rt_sigsuspend
-	.long sys_pread64	/* 180 */
-	.long sys_pwrite64
-	.long sys_lchown16;
-	.long sys_getcwd
-	.long sys_capget
-	.long sys_capset	/* 185 */
-	.long sys_sigaltstack
-	.long sys_sendfile
-	.long sys_ni_syscall				/* streams1 */
-	.long sys_ni_syscall				/* streams2 */
-	.long sys_vfork		/* 190 */
-	.long sys_getrlimit
-	.long sys_mmap2
-	.long sys_truncate64
-	.long sys_ftruncate64
-	.long sys_stat64	/* 195 */
-	.long sys_lstat64
-	.long sys_fstat64
-	.long sys_chown
-	.long sys_getuid
-	.long sys_getgid	/* 200 */
-	.long sys_geteuid
-	.long sys_getegid
-	.long sys_setreuid
-	.long sys_setregid
-	.long sys_getgroups	/* 205 */
-	.long sys_setgroups
-	.long sys_fchown
-	.long sys_setresuid
-	.long sys_getresuid
-	.long sys_setresgid	/* 210 */
-	.long sys_getresgid
-	.long sys_lchown
-	.long sys_setuid
-	.long sys_setgid
-	.long sys_setfsuid	/* 215 */
-	.long sys_setfsgid
-	.long sys_pivot_root
-	.long sys_ni_syscall
-	.long sys_ni_syscall
-	.long sys_getdents64	/* 220 */
-	.long sys_gettid
-	.long sys_tkill
-	.long sys_setxattr
-	.long sys_lsetxattr
-	.long sys_fsetxattr	/* 225 */
-	.long sys_getxattr
-	.long sys_lgetxattr
-	.long sys_fgetxattr
-	.long sys_listxattr
-	.long sys_llistxattr	/* 230 */
-	.long sys_flistxattr
-	.long sys_removexattr
-	.long sys_lremovexattr
-	.long sys_fremovexattr
-	.long sys_futex		/* 235 */
-	.long sys_sendfile64
-	.long sys_mincore
-	.long sys_madvise
-	.long sys_fcntl64
-	.long sys_readahead	/* 240 */
-	.long sys_io_setup
-	.long sys_io_destroy
-	.long sys_io_getevents
-	.long sys_io_submit
-	.long sys_io_cancel	/* 245 */
-	.long sys_fadvise64
-	.long sys_exit_group
-	.long sys_lookup_dcookie
-	.long sys_epoll_create
-	.long sys_epoll_ctl	/* 250 */
-	.long sys_epoll_wait
-	.long sys_remap_file_pages
-	.long sys_set_tid_address
-	.long sys_timer_create
-	.long sys_timer_settime	/* 255 */
-	.long sys_timer_gettime
-	.long sys_timer_getoverrun
-	.long sys_timer_delete
-	.long sys_clock_settime
-	.long sys_clock_gettime	/* 260 */
-	.long sys_clock_getres
-	.long sys_clock_nanosleep
-	.long sys_statfs64
-	.long sys_fstatfs64
-	.long sys_tgkill	/* 265 */
-	.long sys_utimes
-	.long sys_fadvise64_64
-	.long sys_mbind
-	.long sys_get_mempolicy
-	.long sys_set_mempolicy	/* 270 */
-	.long sys_mq_open
-	.long sys_mq_unlink
-	.long sys_mq_timedsend
-	.long sys_mq_timedreceive
-	.long sys_mq_notify	/* 275 */
-	.long sys_mq_getsetattr
-	.long sys_waitid
-	.long sys_ni_syscall	/* for sys_vserver */
-	.long sys_add_key
-	.long sys_request_key	/* 280 */
-	.long sys_keyctl
-	.long sys_ioprio_set
-	.long sys_ioprio_get
-	.long sys_inotify_init
-	.long sys_inotify_add_watch	/* 285 */
-	.long sys_inotify_rm_watch
-	.long sys_migrate_pages
-	.long sys_openat
-	.long sys_mkdirat
-	.long sys_mknodat		/* 290 */
-	.long sys_fchownat
-	.long sys_futimesat
-	.long sys_fstatat64
-	.long sys_unlinkat
-	.long sys_renameat		/* 295 */
-	.long sys_linkat
-	.long sys_symlinkat
-	.long sys_readlinkat
-	.long sys_fchmodat
-	.long sys_faccessat		/* 300 */
-	.long sys_ni_syscall		/* Reserved for pselect6 */
-	.long sys_ni_syscall		/* Reserved for ppoll */
-	.long sys_unshare
-	.long sys_set_robust_list
-	.long sys_get_robust_list	/* 305 */
-	.long sys_splice
-	.long sys_sync_file_range
-	.long sys_tee
-	.long sys_vmsplice
-	.long sys_move_pages		/* 310 */
-	.long sys_sched_setaffinity
-	.long sys_sched_getaffinity
-	.long sys_kexec_load
-	.long sys_getcpu
-	.long sys_epoll_pwait		/* 315 */
-	.long sys_utimensat
-	.long sys_signalfd
-	.long sys_timerfd_create
-	.long sys_eventfd
-	.long sys_fallocate		/* 320 */
-	.long sys_timerfd_settime
-	.long sys_timerfd_gettime
-	.long sys_signalfd4
-	.long sys_eventfd2
-	.long sys_epoll_create1		/* 325 */
-	.long sys_dup3
-	.long sys_pipe2
-	.long sys_inotify_init1
-	.long sys_preadv
-	.long sys_pwritev		/* 330 */
-	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_event_open
-	.long sys_get_thread_area
-	.long sys_set_thread_area
-	.long sys_atomic_cmpxchg_32	/* 335 */
-	.long sys_atomic_barrier
-	.long sys_fanotify_init
-	.long sys_fanotify_mark
-	.long sys_prlimit64
-

diff --git a/arch/m68k/kernel/entry_mm.S b/arch/m68k/kernel/entry_mm.S
new file mode 100644
index 0000000..1559dea
--- /dev/null
+++ b/arch/m68k/kernel/entry_mm.S

@@ -0,0 +1,753 @@
+/* -*- mode: asm -*-
+ *
+ *  linux/arch/m68k/kernel/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file README.legal in the main directory of this archive
+ * for more details.
+ *
+ * Linux/m68k support by Hamish Macdonald
+ *
+ * 68060 fixes by Jesper Skov
+ *
+ */
+
+/*
+ * entry.S  contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ */
+
+/*
+ * 12/03/96 Jes: Currently we only support m68k single-cpu systems, so
+ *               all pointers that used to be 'current' are now entry
+ *               number 0 in the 'current_set' list.
+ *
+ *  6/05/00 RZ:	 addedd writeback completion after return from sighandler
+ *		 for 68040
+ */
+
+#include <linux/linkage.h>
+#include <asm/entry.h>
+#include <asm/errno.h>
+#include <asm/setup.h>
+#include <asm/segment.h>
+#include <asm/traps.h>
+#include <asm/unistd.h>
+
+#include <asm/asm-offsets.h>
+
+.globl system_call, buserr, trap, resume
+.globl sys_call_table
+.globl sys_fork, sys_clone, sys_vfork
+.globl ret_from_interrupt, bad_interrupt
+.globl auto_irqhandler_fixup
+.globl user_irqvec_fixup, user_irqhandler_fixup
+
+.text
+ENTRY(buserr)
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	buserr_c
+	addql	#4,%sp
+	jra	.Lret_from_exception
+
+ENTRY(trap)
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	jra	.Lret_from_exception
+
+	| After a fork we jump here directly from resume,
+	| so that %d1 contains the previous task
+	| schedule_tail now used regardless of CONFIG_SMP
+ENTRY(ret_from_fork)
+	movel	%d1,%sp@-
+	jsr	schedule_tail
+	addql	#4,%sp
+	jra	.Lret_from_exception
+
+do_trace_entry:
+	movel	#-ENOSYS,%sp@(PT_OFF_D0)| needed for strace
+	subql	#4,%sp
+	SAVE_SWITCH_STACK
+	jbsr	syscall_trace
+	RESTORE_SWITCH_STACK
+	addql	#4,%sp
+	movel	%sp@(PT_OFF_ORIG_D0),%d0
+	cmpl	#NR_syscalls,%d0
+	jcs	syscall
+badsys:
+	movel	#-ENOSYS,%sp@(PT_OFF_D0)
+	jra	ret_from_syscall
+
+do_trace_exit:
+	subql	#4,%sp
+	SAVE_SWITCH_STACK
+	jbsr	syscall_trace
+	RESTORE_SWITCH_STACK
+	addql	#4,%sp
+	jra	.Lret_from_exception
+
+ENTRY(ret_from_signal)
+	tstb	%curptr@(TASK_INFO+TINFO_FLAGS+2)
+	jge	1f
+	jbsr	syscall_trace
+1:	RESTORE_SWITCH_STACK
+	addql	#4,%sp
+/* on 68040 complete pending writebacks if any */
+#ifdef CONFIG_M68040
+	bfextu	%sp@(PT_OFF_FORMATVEC){#0,#4},%d0
+	subql	#7,%d0				| bus error frame ?
+	jbne	1f
+	movel	%sp,%sp@-
+	jbsr	berr_040cleanup
+	addql	#4,%sp
+1:
+#endif
+	jra	.Lret_from_exception
+
+ENTRY(system_call)
+	SAVE_ALL_SYS
+
+	GET_CURRENT(%d1)
+	| save top of frame
+	movel	%sp,%curptr@(TASK_THREAD+THREAD_ESP0)
+
+	| syscall trace?
+	tstb	%curptr@(TASK_INFO+TINFO_FLAGS+2)
+	jmi	do_trace_entry
+	cmpl	#NR_syscalls,%d0
+	jcc	badsys
+syscall:
+	jbsr	@(sys_call_table,%d0:l:4)@(0)
+	movel	%d0,%sp@(PT_OFF_D0)	| save the return value
+ret_from_syscall:
+	|oriw	#0x0700,%sr
+	movew	%curptr@(TASK_INFO+TINFO_FLAGS+2),%d0
+	jne	syscall_exit_work
+1:	RESTORE_ALL
+
+syscall_exit_work:
+	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
+	bnes	1b			| if so, skip resched, signals
+	lslw	#1,%d0
+	jcs	do_trace_exit
+	jmi	do_delayed_trace
+	lslw	#8,%d0
+	jmi	do_signal_return
+	pea	resume_userspace
+	jra	schedule
+
+
+ENTRY(ret_from_exception)
+.Lret_from_exception:
+	btst	#5,%sp@(PT_OFF_SR)	| check if returning to kernel
+	bnes	1f			| if so, skip resched, signals
+	| only allow interrupts when we are really the last one on the
+	| kernel stack, otherwise stack overflow can occur during
+	| heavy interrupt load
+	andw	#ALLOWINT,%sr
+
+resume_userspace:
+	moveb	%curptr@(TASK_INFO+TINFO_FLAGS+3),%d0
+	jne	exit_work
+1:	RESTORE_ALL
+
+exit_work:
+	| save top of frame
+	movel	%sp,%curptr@(TASK_THREAD+THREAD_ESP0)
+	lslb	#1,%d0
+	jmi	do_signal_return
+	pea	resume_userspace
+	jra	schedule
+
+
+do_signal_return:
+	|andw	#ALLOWINT,%sr
+	subql	#4,%sp			| dummy return address
+	SAVE_SWITCH_STACK
+	pea	%sp@(SWITCH_STACK_SIZE)
+	bsrl	do_signal
+	addql	#4,%sp
+	RESTORE_SWITCH_STACK
+	addql	#4,%sp
+	jbra	resume_userspace
+
+do_delayed_trace:
+	bclr	#7,%sp@(PT_OFF_SR)	| clear trace bit in SR
+	pea	1			| send SIGTRAP
+	movel	%curptr,%sp@-
+	pea	LSIGTRAP
+	jbsr	send_sig
+	addql	#8,%sp
+	addql	#4,%sp
+	jbra	resume_userspace
+
+
+/* This is the main interrupt handler for autovector interrupts */
+
+ENTRY(auto_inthandler)
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	addqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
+					|  put exception # in d0
+	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
+	subw	#VEC_SPUR,%d0
+
+	movel	%sp,%sp@-
+	movel	%d0,%sp@-		|  put vector # on stack
+auto_irqhandler_fixup = . + 2
+	jsr	__m68k_handle_int	|  process the IRQ
+	addql	#8,%sp			|  pop parameters off stack
+
+ret_from_interrupt:
+	subqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
+	jeq	ret_from_last_interrupt
+2:	RESTORE_ALL
+
+	ALIGN
+ret_from_last_interrupt:
+	moveq	#(~ALLOWINT>>8)&0xff,%d0
+	andb	%sp@(PT_OFF_SR),%d0
+	jne	2b
+
+	/* check if we need to do software interrupts */
+	tstl	irq_stat+CPUSTAT_SOFTIRQ_PENDING
+	jeq	.Lret_from_exception
+	pea	ret_from_exception
+	jra	do_softirq
+
+/* Handler for user defined interrupt vectors */
+
+ENTRY(user_inthandler)
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	addqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
+					|  put exception # in d0
+	bfextu	%sp@(PT_OFF_FORMATVEC){#4,#10},%d0
+user_irqvec_fixup = . + 2
+	subw	#VEC_USER,%d0
+
+	movel	%sp,%sp@-
+	movel	%d0,%sp@-		|  put vector # on stack
+user_irqhandler_fixup = . + 2
+	jsr	__m68k_handle_int	|  process the IRQ
+	addql	#8,%sp			|  pop parameters off stack
+
+	subqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
+	jeq	ret_from_last_interrupt
+	RESTORE_ALL
+
+/* Handler for uninitialized and spurious interrupts */
+
+ENTRY(bad_inthandler)
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	addqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
+
+	movel	%sp,%sp@-
+	jsr	handle_badint
+	addql	#4,%sp
+
+	subqb	#1,%curptr@(TASK_INFO+TINFO_PREEMPT+1)
+	jeq	ret_from_last_interrupt
+	RESTORE_ALL
+
+
+ENTRY(sys_fork)
+	SAVE_SWITCH_STACK
+	pea	%sp@(SWITCH_STACK_SIZE)
+	jbsr	m68k_fork
+	addql	#4,%sp
+	RESTORE_SWITCH_STACK
+	rts
+
+ENTRY(sys_clone)
+	SAVE_SWITCH_STACK
+	pea	%sp@(SWITCH_STACK_SIZE)
+	jbsr	m68k_clone
+	addql	#4,%sp
+	RESTORE_SWITCH_STACK
+	rts
+
+ENTRY(sys_vfork)
+	SAVE_SWITCH_STACK
+	pea	%sp@(SWITCH_STACK_SIZE)
+	jbsr	m68k_vfork
+	addql	#4,%sp
+	RESTORE_SWITCH_STACK
+	rts
+
+ENTRY(sys_sigreturn)
+	SAVE_SWITCH_STACK
+	jbsr	do_sigreturn
+	RESTORE_SWITCH_STACK
+	rts
+
+ENTRY(sys_rt_sigreturn)
+	SAVE_SWITCH_STACK
+	jbsr	do_rt_sigreturn
+	RESTORE_SWITCH_STACK
+	rts
+
+resume:
+	/*
+	 * Beware - when entering resume, prev (the current task) is
+	 * in a0, next (the new task) is in a1,so don't change these
+	 * registers until their contents are no longer needed.
+	 */
+
+	/* save sr */
+	movew	%sr,%a0@(TASK_THREAD+THREAD_SR)
+
+	/* save fs (sfc,%dfc) (may be pointing to kernel memory) */
+	movec	%sfc,%d0
+	movew	%d0,%a0@(TASK_THREAD+THREAD_FS)
+
+	/* save usp */
+	/* it is better to use a movel here instead of a movew 8*) */
+	movec	%usp,%d0
+	movel	%d0,%a0@(TASK_THREAD+THREAD_USP)
+
+	/* save non-scratch registers on stack */
+	SAVE_SWITCH_STACK
+
+	/* save current kernel stack pointer */
+	movel	%sp,%a0@(TASK_THREAD+THREAD_KSP)
+
+	/* save floating point context */
+#ifndef CONFIG_M68KFPU_EMU_ONLY
+#ifdef CONFIG_M68KFPU_EMU
+	tstl	m68k_fputype
+	jeq	3f
+#endif
+	fsave	%a0@(TASK_THREAD+THREAD_FPSTATE)
+
+#if defined(CONFIG_M68060)
+#if !defined(CPU_M68060_ONLY)
+	btst	#3,m68k_cputype+3
+	beqs	1f
+#endif
+	/* The 060 FPU keeps status in bits 15-8 of the first longword */
+	tstb	%a0@(TASK_THREAD+THREAD_FPSTATE+2)
+	jeq	3f
+#if !defined(CPU_M68060_ONLY)
+	jra	2f
+#endif
+#endif /* CONFIG_M68060 */
+#if !defined(CPU_M68060_ONLY)
+1:	tstb	%a0@(TASK_THREAD+THREAD_FPSTATE)
+	jeq	3f
+#endif
+2:	fmovemx	%fp0-%fp7,%a0@(TASK_THREAD+THREAD_FPREG)
+	fmoveml	%fpcr/%fpsr/%fpiar,%a0@(TASK_THREAD+THREAD_FPCNTL)
+3:
+#endif	/* CONFIG_M68KFPU_EMU_ONLY */
+	/* Return previous task in %d1 */
+	movel	%curptr,%d1
+
+	/* switch to new task (a1 contains new task) */
+	movel	%a1,%curptr
+
+	/* restore floating point context */
+#ifndef CONFIG_M68KFPU_EMU_ONLY
+#ifdef CONFIG_M68KFPU_EMU
+	tstl	m68k_fputype
+	jeq	4f
+#endif
+#if defined(CONFIG_M68060)
+#if !defined(CPU_M68060_ONLY)
+	btst	#3,m68k_cputype+3
+	beqs	1f
+#endif
+	/* The 060 FPU keeps status in bits 15-8 of the first longword */
+	tstb	%a1@(TASK_THREAD+THREAD_FPSTATE+2)
+	jeq	3f
+#if !defined(CPU_M68060_ONLY)
+	jra	2f
+#endif
+#endif /* CONFIG_M68060 */
+#if !defined(CPU_M68060_ONLY)
+1:	tstb	%a1@(TASK_THREAD+THREAD_FPSTATE)
+	jeq	3f
+#endif
+2:	fmovemx	%a1@(TASK_THREAD+THREAD_FPREG),%fp0-%fp7
+	fmoveml	%a1@(TASK_THREAD+THREAD_FPCNTL),%fpcr/%fpsr/%fpiar
+3:	frestore %a1@(TASK_THREAD+THREAD_FPSTATE)
+4:
+#endif	/* CONFIG_M68KFPU_EMU_ONLY */
+
+	/* restore the kernel stack pointer */
+	movel	%a1@(TASK_THREAD+THREAD_KSP),%sp
+
+	/* restore non-scratch registers */
+	RESTORE_SWITCH_STACK
+
+	/* restore user stack pointer */
+	movel	%a1@(TASK_THREAD+THREAD_USP),%a0
+	movel	%a0,%usp
+
+	/* restore fs (sfc,%dfc) */
+	movew	%a1@(TASK_THREAD+THREAD_FS),%a0
+	movec	%a0,%sfc
+	movec	%a0,%dfc
+
+	/* restore status register */
+	movew	%a1@(TASK_THREAD+THREAD_SR),%sr
+
+	rts
+
+.data
+ALIGN
+sys_call_table:
+	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
+	.long sys_exit
+	.long sys_fork
+	.long sys_read
+	.long sys_write
+	.long sys_open		/* 5 */
+	.long sys_close
+	.long sys_waitpid
+	.long sys_creat
+	.long sys_link
+	.long sys_unlink	/* 10 */
+	.long sys_execve
+	.long sys_chdir
+	.long sys_time
+	.long sys_mknod
+	.long sys_chmod		/* 15 */
+	.long sys_chown16
+	.long sys_ni_syscall				/* old break syscall holder */
+	.long sys_stat
+	.long sys_lseek
+	.long sys_getpid	/* 20 */
+	.long sys_mount
+	.long sys_oldumount
+	.long sys_setuid16
+	.long sys_getuid16
+	.long sys_stime		/* 25 */
+	.long sys_ptrace
+	.long sys_alarm
+	.long sys_fstat
+	.long sys_pause
+	.long sys_utime		/* 30 */
+	.long sys_ni_syscall				/* old stty syscall holder */
+	.long sys_ni_syscall				/* old gtty syscall holder */
+	.long sys_access
+	.long sys_nice
+	.long sys_ni_syscall	/* 35 */	/* old ftime syscall holder */
+	.long sys_sync
+	.long sys_kill
+	.long sys_rename
+	.long sys_mkdir
+	.long sys_rmdir		/* 40 */
+	.long sys_dup
+	.long sys_pipe
+	.long sys_times
+	.long sys_ni_syscall				/* old prof syscall holder */
+	.long sys_brk		/* 45 */
+	.long sys_setgid16
+	.long sys_getgid16
+	.long sys_signal
+	.long sys_geteuid16
+	.long sys_getegid16	/* 50 */
+	.long sys_acct
+	.long sys_umount				/* recycled never used phys() */
+	.long sys_ni_syscall				/* old lock syscall holder */
+	.long sys_ioctl
+	.long sys_fcntl		/* 55 */
+	.long sys_ni_syscall				/* old mpx syscall holder */
+	.long sys_setpgid
+	.long sys_ni_syscall				/* old ulimit syscall holder */
+	.long sys_ni_syscall
+	.long sys_umask		/* 60 */
+	.long sys_chroot
+	.long sys_ustat
+	.long sys_dup2
+	.long sys_getppid
+	.long sys_getpgrp	/* 65 */
+	.long sys_setsid
+	.long sys_sigaction
+	.long sys_sgetmask
+	.long sys_ssetmask
+	.long sys_setreuid16	/* 70 */
+	.long sys_setregid16
+	.long sys_sigsuspend
+	.long sys_sigpending
+	.long sys_sethostname
+	.long sys_setrlimit	/* 75 */
+	.long sys_old_getrlimit
+	.long sys_getrusage
+	.long sys_gettimeofday
+	.long sys_settimeofday
+	.long sys_getgroups16	/* 80 */
+	.long sys_setgroups16
+	.long sys_old_select
+	.long sys_symlink
+	.long sys_lstat
+	.long sys_readlink	/* 85 */
+	.long sys_uselib
+	.long sys_swapon
+	.long sys_reboot
+	.long sys_old_readdir
+	.long sys_old_mmap	/* 90 */
+	.long sys_munmap
+	.long sys_truncate
+	.long sys_ftruncate
+	.long sys_fchmod
+	.long sys_fchown16	/* 95 */
+	.long sys_getpriority
+	.long sys_setpriority
+	.long sys_ni_syscall				/* old profil syscall holder */
+	.long sys_statfs
+	.long sys_fstatfs	/* 100 */
+	.long sys_ni_syscall				/* ioperm for i386 */
+	.long sys_socketcall
+	.long sys_syslog
+	.long sys_setitimer
+	.long sys_getitimer	/* 105 */
+	.long sys_newstat
+	.long sys_newlstat
+	.long sys_newfstat
+	.long sys_ni_syscall
+	.long sys_ni_syscall	/* 110 */	/* iopl for i386 */
+	.long sys_vhangup
+	.long sys_ni_syscall				/* obsolete idle() syscall */
+	.long sys_ni_syscall				/* vm86old for i386 */
+	.long sys_wait4
+	.long sys_swapoff	/* 115 */
+	.long sys_sysinfo
+	.long sys_ipc
+	.long sys_fsync
+	.long sys_sigreturn
+	.long sys_clone		/* 120 */
+	.long sys_setdomainname
+	.long sys_newuname
+	.long sys_cacheflush				/* modify_ldt for i386 */
+	.long sys_adjtimex
+	.long sys_mprotect	/* 125 */
+	.long sys_sigprocmask
+	.long sys_ni_syscall		/* old "create_module" */
+	.long sys_init_module
+	.long sys_delete_module
+	.long sys_ni_syscall	/* 130 - old "get_kernel_syms" */
+	.long sys_quotactl
+	.long sys_getpgid
+	.long sys_fchdir
+	.long sys_bdflush
+	.long sys_sysfs		/* 135 */
+	.long sys_personality
+	.long sys_ni_syscall				/* for afs_syscall */
+	.long sys_setfsuid16
+	.long sys_setfsgid16
+	.long sys_llseek	/* 140 */
+	.long sys_getdents
+	.long sys_select
+	.long sys_flock
+	.long sys_msync
+	.long sys_readv		/* 145 */
+	.long sys_writev
+	.long sys_getsid
+	.long sys_fdatasync
+	.long sys_sysctl
+	.long sys_mlock		/* 150 */
+	.long sys_munlock
+	.long sys_mlockall
+	.long sys_munlockall
+	.long sys_sched_setparam
+	.long sys_sched_getparam	/* 155 */
+	.long sys_sched_setscheduler
+	.long sys_sched_getscheduler
+	.long sys_sched_yield
+	.long sys_sched_get_priority_max
+	.long sys_sched_get_priority_min  /* 160 */
+	.long sys_sched_rr_get_interval
+	.long sys_nanosleep
+	.long sys_mremap
+	.long sys_setresuid16
+	.long sys_getresuid16	/* 165 */
+	.long sys_getpagesize
+	.long sys_ni_syscall		/* old sys_query_module */
+	.long sys_poll
+	.long sys_nfsservctl
+	.long sys_setresgid16	/* 170 */
+	.long sys_getresgid16
+	.long sys_prctl
+	.long sys_rt_sigreturn
+	.long sys_rt_sigaction
+	.long sys_rt_sigprocmask	/* 175 */
+	.long sys_rt_sigpending
+	.long sys_rt_sigtimedwait
+	.long sys_rt_sigqueueinfo
+	.long sys_rt_sigsuspend
+	.long sys_pread64	/* 180 */
+	.long sys_pwrite64
+	.long sys_lchown16;
+	.long sys_getcwd
+	.long sys_capget
+	.long sys_capset	/* 185 */
+	.long sys_sigaltstack
+	.long sys_sendfile
+	.long sys_ni_syscall				/* streams1 */
+	.long sys_ni_syscall				/* streams2 */
+	.long sys_vfork		/* 190 */
+	.long sys_getrlimit
+	.long sys_mmap2
+	.long sys_truncate64
+	.long sys_ftruncate64
+	.long sys_stat64	/* 195 */
+	.long sys_lstat64
+	.long sys_fstat64
+	.long sys_chown
+	.long sys_getuid
+	.long sys_getgid	/* 200 */
+	.long sys_geteuid
+	.long sys_getegid
+	.long sys_setreuid
+	.long sys_setregid
+	.long sys_getgroups	/* 205 */
+	.long sys_setgroups
+	.long sys_fchown
+	.long sys_setresuid
+	.long sys_getresuid
+	.long sys_setresgid	/* 210 */
+	.long sys_getresgid
+	.long sys_lchown
+	.long sys_setuid
+	.long sys_setgid
+	.long sys_setfsuid	/* 215 */
+	.long sys_setfsgid
+	.long sys_pivot_root
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_getdents64	/* 220 */
+	.long sys_gettid
+	.long sys_tkill
+	.long sys_setxattr
+	.long sys_lsetxattr
+	.long sys_fsetxattr	/* 225 */
+	.long sys_getxattr
+	.long sys_lgetxattr
+	.long sys_fgetxattr
+	.long sys_listxattr
+	.long sys_llistxattr	/* 230 */
+	.long sys_flistxattr
+	.long sys_removexattr
+	.long sys_lremovexattr
+	.long sys_fremovexattr
+	.long sys_futex		/* 235 */
+	.long sys_sendfile64
+	.long sys_mincore
+	.long sys_madvise
+	.long sys_fcntl64
+	.long sys_readahead	/* 240 */
+	.long sys_io_setup
+	.long sys_io_destroy
+	.long sys_io_getevents
+	.long sys_io_submit
+	.long sys_io_cancel	/* 245 */
+	.long sys_fadvise64
+	.long sys_exit_group
+	.long sys_lookup_dcookie
+	.long sys_epoll_create
+	.long sys_epoll_ctl	/* 250 */
+	.long sys_epoll_wait
+	.long sys_remap_file_pages
+	.long sys_set_tid_address
+	.long sys_timer_create
+	.long sys_timer_settime	/* 255 */
+	.long sys_timer_gettime
+	.long sys_timer_getoverrun
+	.long sys_timer_delete
+	.long sys_clock_settime
+	.long sys_clock_gettime	/* 260 */
+	.long sys_clock_getres
+	.long sys_clock_nanosleep
+	.long sys_statfs64
+	.long sys_fstatfs64
+	.long sys_tgkill	/* 265 */
+	.long sys_utimes
+	.long sys_fadvise64_64
+	.long sys_mbind
+	.long sys_get_mempolicy
+	.long sys_set_mempolicy	/* 270 */
+	.long sys_mq_open
+	.long sys_mq_unlink
+	.long sys_mq_timedsend
+	.long sys_mq_timedreceive
+	.long sys_mq_notify	/* 275 */
+	.long sys_mq_getsetattr
+	.long sys_waitid
+	.long sys_ni_syscall	/* for sys_vserver */
+	.long sys_add_key
+	.long sys_request_key	/* 280 */
+	.long sys_keyctl
+	.long sys_ioprio_set
+	.long sys_ioprio_get
+	.long sys_inotify_init
+	.long sys_inotify_add_watch	/* 285 */
+	.long sys_inotify_rm_watch
+	.long sys_migrate_pages
+	.long sys_openat
+	.long sys_mkdirat
+	.long sys_mknodat		/* 290 */
+	.long sys_fchownat
+	.long sys_futimesat
+	.long sys_fstatat64
+	.long sys_unlinkat
+	.long sys_renameat		/* 295 */
+	.long sys_linkat
+	.long sys_symlinkat
+	.long sys_readlinkat
+	.long sys_fchmodat
+	.long sys_faccessat		/* 300 */
+	.long sys_ni_syscall		/* Reserved for pselect6 */
+	.long sys_ni_syscall		/* Reserved for ppoll */
+	.long sys_unshare
+	.long sys_set_robust_list
+	.long sys_get_robust_list	/* 305 */
+	.long sys_splice
+	.long sys_sync_file_range
+	.long sys_tee
+	.long sys_vmsplice
+	.long sys_move_pages		/* 310 */
+	.long sys_sched_setaffinity
+	.long sys_sched_getaffinity
+	.long sys_kexec_load
+	.long sys_getcpu
+	.long sys_epoll_pwait		/* 315 */
+	.long sys_utimensat
+	.long sys_signalfd
+	.long sys_timerfd_create
+	.long sys_eventfd
+	.long sys_fallocate		/* 320 */
+	.long sys_timerfd_settime
+	.long sys_timerfd_gettime
+	.long sys_signalfd4
+	.long sys_eventfd2
+	.long sys_epoll_create1		/* 325 */
+	.long sys_dup3
+	.long sys_pipe2
+	.long sys_inotify_init1
+	.long sys_preadv
+	.long sys_pwritev		/* 330 */
+	.long sys_rt_tgsigqueueinfo
+	.long sys_perf_event_open
+	.long sys_get_thread_area
+	.long sys_set_thread_area
+	.long sys_atomic_cmpxchg_32	/* 335 */
+	.long sys_atomic_barrier
+	.long sys_fanotify_init
+	.long sys_fanotify_mark
+	.long sys_prlimit64
+

diff --git a/arch/m68knommu/kernel/entry.S b/arch/m68k/kernel/entry_no.S
similarity index 100%
rename from arch/m68knommu/kernel/entry.S
rename to arch/m68k/kernel/entry_no.S


diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68k/kernel/init_task.c
similarity index 100%
rename from arch/m68knommu/kernel/init_task.c
rename to arch/m68k/kernel/init_task.c


diff --git a/arch/m68knommu/kernel/irq.c b/arch/m68k/kernel/irq.c
similarity index 100%
rename from arch/m68knommu/kernel/irq.c
rename to arch/m68k/kernel/irq.c


diff --git a/arch/m68k/kernel/m68k_ksyms.c b/arch/m68k/kernel/m68k_ksyms.c
index d900e77..4752c28 100644
--- a/arch/m68k/kernel/m68k_ksyms.c
+++ b/arch/m68k/kernel/m68k_ksyms.c

@@ -1,16 +1,5 @@
-#include <linux/module.h>
-
-asmlinkage long long __ashldi3 (long long, int);
-asmlinkage long long __ashrdi3 (long long, int);
-asmlinkage long long __lshrdi3 (long long, int);
-asmlinkage long long __muldi3 (long long, long long);
-
-/* The following are special because they're not called
-   explicitly (the C compiler generates them).  Fortunately,
-   their interface isn't gonna change any time soon now, so
-   it's OK to leave it out of version control.  */
-EXPORT_SYMBOL(__ashldi3);
-EXPORT_SYMBOL(__ashrdi3);
-EXPORT_SYMBOL(__lshrdi3);
-EXPORT_SYMBOL(__muldi3);
-
+#ifdef CONFIG_MMU
+#include "m68k_ksyms_mm.c"
+#else
+#include "m68k_ksyms_no.c"
+#endif

diff --git a/arch/m68k/kernel/m68k_ksyms_mm.c b/arch/m68k/kernel/m68k_ksyms_mm.c
new file mode 100644
index 0000000..d900e77
--- /dev/null
+++ b/arch/m68k/kernel/m68k_ksyms_mm.c

@@ -0,0 +1,16 @@
+#include <linux/module.h>
+
+asmlinkage long long __ashldi3 (long long, int);
+asmlinkage long long __ashrdi3 (long long, int);
+asmlinkage long long __lshrdi3 (long long, int);
+asmlinkage long long __muldi3 (long long, long long);
+
+/* The following are special because they're not called
+   explicitly (the C compiler generates them).  Fortunately,
+   their interface isn't gonna change any time soon now, so
+   it's OK to leave it out of version control.  */
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(__muldi3);
+

diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68k/kernel/m68k_ksyms_no.c
similarity index 100%
rename from arch/m68knommu/kernel/m68k_ksyms.c
rename to arch/m68k/kernel/m68k_ksyms_no.c


diff --git a/arch/m68k/kernel/module.c b/arch/m68k/kernel/module.c
index cd6bcb1..7ea203c 100644
--- a/arch/m68k/kernel/module.c
+++ b/arch/m68k/kernel/module.c

@@ -1,155 +1,5 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-#include <linux/moduleloader.h>
-#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-#if 0
-#define DEBUGP printk
+#ifdef CONFIG_MMU
+#include "module_mm.c"
 #else
-#define DEBUGP(fmt...)
+#include "module_no.c"
 #endif
-
-#ifdef CONFIG_MODULES
-
-void *module_alloc(unsigned long size)
-{
-	if (size == 0)
-		return NULL;
-	return vmalloc(size);
-}
-
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
-	vfree(module_region);
-}
-
-/* We don't need anything special. */
-int module_frob_arch_sections(Elf_Ehdr *hdr,
-			      Elf_Shdr *sechdrs,
-			      char *secstrings,
-			      struct module *mod)
-{
-	return 0;
-}
-
-int apply_relocate(Elf32_Shdr *sechdrs,
-		   const char *strtab,
-		   unsigned int symindex,
-		   unsigned int relsec,
-		   struct module *me)
-{
-	unsigned int i;
-	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
-	Elf32_Sym *sym;
-	uint32_t *location;
-
-	DEBUGP("Applying relocate section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
-		/* This is where to make the change */
-		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rel[i].r_offset;
-		/* This is the symbol it is referring to.  Note that all
-		   undefined symbols have been resolved.  */
-		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
-			+ ELF32_R_SYM(rel[i].r_info);
-
-		switch (ELF32_R_TYPE(rel[i].r_info)) {
-		case R_68K_32:
-			/* We add the value into the location given */
-			*location += sym->st_value;
-			break;
-		case R_68K_PC32:
-			/* Add the value, subtract its postition */
-			*location += sym->st_value - (uint32_t)location;
-			break;
-		default:
-			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
-			       me->name, ELF32_R_TYPE(rel[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-	return 0;
-}
-
-int apply_relocate_add(Elf32_Shdr *sechdrs,
-		       const char *strtab,
-		       unsigned int symindex,
-		       unsigned int relsec,
-		       struct module *me)
-{
-	unsigned int i;
-	Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr;
-	Elf32_Sym *sym;
-	uint32_t *location;
-
-	DEBUGP("Applying relocate_add section %u to %u\n", relsec,
-	       sechdrs[relsec].sh_info);
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
-		/* This is where to make the change */
-		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
-			+ rel[i].r_offset;
-		/* This is the symbol it is referring to.  Note that all
-		   undefined symbols have been resolved.  */
-		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
-			+ ELF32_R_SYM(rel[i].r_info);
-
-		switch (ELF32_R_TYPE(rel[i].r_info)) {
-		case R_68K_32:
-			/* We add the value into the location given */
-			*location = rel[i].r_addend + sym->st_value;
-			break;
-		case R_68K_PC32:
-			/* Add the value, subtract its postition */
-			*location = rel[i].r_addend + sym->st_value - (uint32_t)location;
-			break;
-		default:
-			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
-			       me->name, ELF32_R_TYPE(rel[i].r_info));
-			return -ENOEXEC;
-		}
-	}
-	return 0;
-}
-
-int module_finalize(const Elf_Ehdr *hdr,
-		    const Elf_Shdr *sechdrs,
-		    struct module *mod)
-{
-	module_fixup(mod, mod->arch.fixup_start, mod->arch.fixup_end);
-
-	return 0;
-}
-
-void module_arch_cleanup(struct module *mod)
-{
-}
-
-#endif /* CONFIG_MODULES */
-
-void module_fixup(struct module *mod, struct m68k_fixup_info *start,
-		  struct m68k_fixup_info *end)
-{
-	struct m68k_fixup_info *fixup;
-
-	for (fixup = start; fixup < end; fixup++) {
-		switch (fixup->type) {
-		case m68k_fixup_memoffset:
-			*(u32 *)fixup->addr = m68k_memoffset;
-			break;
-		case m68k_fixup_vnode_shift:
-			*(u16 *)fixup->addr += m68k_virt_to_node_shift;
-			break;
-		}
-	}
-}

diff --git a/arch/m68k/kernel/module_mm.c b/arch/m68k/kernel/module_mm.c
new file mode 100644
index 0000000..cd6bcb1
--- /dev/null
+++ b/arch/m68k/kernel/module_mm.c

@@ -0,0 +1,155 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+#ifdef CONFIG_MODULES
+
+void *module_alloc(unsigned long size)
+{
+	if (size == 0)
+		return NULL;
+	return vmalloc(size);
+}
+
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+	vfree(module_region);
+}
+
+/* We don't need anything special. */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+			      Elf_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *mod)
+{
+	return 0;
+}
+
+int apply_relocate(Elf32_Shdr *sechdrs,
+		   const char *strtab,
+		   unsigned int symindex,
+		   unsigned int relsec,
+		   struct module *me)
+{
+	unsigned int i;
+	Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	uint32_t *location;
+
+	DEBUGP("Applying relocate section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_68K_32:
+			/* We add the value into the location given */
+			*location += sym->st_value;
+			break;
+		case R_68K_PC32:
+			/* Add the value, subtract its postition */
+			*location += sym->st_value - (uint32_t)location;
+			break;
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	unsigned int i;
+	Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Sym *sym;
+	uint32_t *location;
+
+	DEBUGP("Applying relocate_add section %u to %u\n", relsec,
+	       sechdrs[relsec].sh_info);
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		/* This is where to make the change */
+		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+			+ ELF32_R_SYM(rel[i].r_info);
+
+		switch (ELF32_R_TYPE(rel[i].r_info)) {
+		case R_68K_32:
+			/* We add the value into the location given */
+			*location = rel[i].r_addend + sym->st_value;
+			break;
+		case R_68K_PC32:
+			/* Add the value, subtract its postition */
+			*location = rel[i].r_addend + sym->st_value - (uint32_t)location;
+			break;
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+			       me->name, ELF32_R_TYPE(rel[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *mod)
+{
+	module_fixup(mod, mod->arch.fixup_start, mod->arch.fixup_end);
+
+	return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+}
+
+#endif /* CONFIG_MODULES */
+
+void module_fixup(struct module *mod, struct m68k_fixup_info *start,
+		  struct m68k_fixup_info *end)
+{
+	struct m68k_fixup_info *fixup;
+
+	for (fixup = start; fixup < end; fixup++) {
+		switch (fixup->type) {
+		case m68k_fixup_memoffset:
+			*(u32 *)fixup->addr = m68k_memoffset;
+			break;
+		case m68k_fixup_vnode_shift:
+			*(u16 *)fixup->addr += m68k_virt_to_node_shift;
+			break;
+		}
+	}
+}

diff --git a/arch/m68knommu/kernel/module.c b/arch/m68k/kernel/module_no.c
similarity index 100%
rename from arch/m68knommu/kernel/module.c
rename to arch/m68k/kernel/module_no.c


diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index c2a1fc2..6cf4bd6 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c

@@ -1,354 +1,5 @@
-/*
- *  linux/arch/m68k/kernel/process.c
- *
- *  Copyright (C) 1995  Hamish Macdonald
- *
- *  68060 fixes by Jesper Skov
- */
-
-/*
- * This file handles the architecture-dependent parts of process handling..
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/reboot.h>
-#include <linux/init_task.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/traps.h>
-#include <asm/machdep.h>
-#include <asm/setup.h>
-#include <asm/pgtable.h>
-
-/*
- * Initial task/thread structure. Make this a per-architecture thing,
- * because different architectures tend to have different
- * alignment requirements and potentially different initial
- * setup.
- */
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-union thread_union init_thread_union __init_task_data
-	__attribute__((aligned(THREAD_SIZE))) =
-		{ INIT_THREAD_INFO(init_task) };
-
-/* initial task structure */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
-
-asmlinkage void ret_from_fork(void);
-
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
-	/* Check whether the thread is blocked in resume() */
-	if (in_sched_functions(sw->retpc))
-		return ((unsigned long *)sw->a6)[1];
-	else
-		return sw->retpc;
-}
-
-/*
- * The idle loop on an m68k..
- */
-static void default_idle(void)
-{
-	if (!need_resched())
-#if defined(MACH_ATARI_ONLY)
-		/* block out HSYNC on the atari (falcon) */
-		__asm__("stop #0x2200" : : : "cc");
+#ifdef CONFIG_MMU
+#include "process_mm.c"
 #else
-		__asm__("stop #0x2000" : : : "cc");
+#include "process_no.c"
 #endif
-}
-
-void (*idle)(void) = default_idle;
-
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle(void)
-{
-	/* endless idle loop with no priority at all */
-	while (1) {
-		while (!need_resched())
-			idle();
-		preempt_enable_no_resched();
-		schedule();
-		preempt_disable();
-	}
-}
-
-void machine_restart(char * __unused)
-{
-	if (mach_reset)
-		mach_reset();
-	for (;;);
-}
-
-void machine_halt(void)
-{
-	if (mach_halt)
-		mach_halt();
-	for (;;);
-}
-
-void machine_power_off(void)
-{
-	if (mach_power_off)
-		mach_power_off();
-	for (;;);
-}
-
-void (*pm_power_off)(void) = machine_power_off;
-EXPORT_SYMBOL(pm_power_off);
-
-void show_regs(struct pt_regs * regs)
-{
-	printk("\n");
-	printk("Format %02x  Vector: %04x  PC: %08lx  Status: %04x    %s\n",
-	       regs->format, regs->vector, regs->pc, regs->sr, print_tainted());
-	printk("ORIG_D0: %08lx  D0: %08lx  A2: %08lx  A1: %08lx\n",
-	       regs->orig_d0, regs->d0, regs->a2, regs->a1);
-	printk("A0: %08lx  D5: %08lx  D4: %08lx\n",
-	       regs->a0, regs->d5, regs->d4);
-	printk("D3: %08lx  D2: %08lx  D1: %08lx\n",
-	       regs->d3, regs->d2, regs->d1);
-	if (!(regs->sr & PS_S))
-		printk("USP: %08lx\n", rdusp());
-}
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	int pid;
-	mm_segment_t fs;
-
-	fs = get_fs();
-	set_fs (KERNEL_DS);
-
-	{
-	register long retval __asm__ ("d0");
-	register long clone_arg __asm__ ("d1") = flags | CLONE_VM | CLONE_UNTRACED;
-
-	retval = __NR_clone;
-	__asm__ __volatile__
-	  ("clrl %%d2\n\t"
-	   "trap #0\n\t"		/* Linux/m68k system call */
-	   "tstl %0\n\t"		/* child or parent */
-	   "jne 1f\n\t"			/* parent - jump */
-	   "lea %%sp@(%c7),%6\n\t"	/* reload current */
-	   "movel %6@,%6\n\t"
-	   "movel %3,%%sp@-\n\t"	/* push argument */
-	   "jsr %4@\n\t"		/* call fn */
-	   "movel %0,%%d1\n\t"		/* pass exit value */
-	   "movel %2,%%d0\n\t"		/* exit */
-	   "trap #0\n"
-	   "1:"
-	   : "+d" (retval)
-	   : "i" (__NR_clone), "i" (__NR_exit),
-	     "r" (arg), "a" (fn), "d" (clone_arg), "r" (current),
-	     "i" (-THREAD_SIZE)
-	   : "d2");
-
-	pid = retval;
-	}
-
-	set_fs (fs);
-	return pid;
-}
-EXPORT_SYMBOL(kernel_thread);
-
-void flush_thread(void)
-{
-	unsigned long zero = 0;
-	set_fs(USER_DS);
-	current->thread.fs = __USER_DS;
-	if (!FPU_IS_EMU)
-		asm volatile (".chip 68k/68881\n\t"
-			      "frestore %0@\n\t"
-			      ".chip 68k" : : "a" (&zero));
-}
-
-/*
- * "m68k_fork()".. By the time we get here, the
- * non-volatile registers have also been saved on the
- * stack. We do some ugly pointer stuff here.. (see
- * also copy_thread)
- */
-
-asmlinkage int m68k_fork(struct pt_regs *regs)
-{
-	return do_fork(SIGCHLD, rdusp(), regs, 0, NULL, NULL);
-}
-
-asmlinkage int m68k_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, rdusp(), regs, 0,
-		       NULL, NULL);
-}
-
-asmlinkage int m68k_clone(struct pt_regs *regs)
-{
-	unsigned long clone_flags;
-	unsigned long newsp;
-	int __user *parent_tidptr, *child_tidptr;
-
-	/* syscall2 puts clone_flags in d1 and usp in d2 */
-	clone_flags = regs->d1;
-	newsp = regs->d2;
-	parent_tidptr = (int __user *)regs->d3;
-	child_tidptr = (int __user *)regs->d4;
-	if (!newsp)
-		newsp = rdusp();
-	return do_fork(clone_flags, newsp, regs, 0,
-		       parent_tidptr, child_tidptr);
-}
-
-int copy_thread(unsigned long clone_flags, unsigned long usp,
-		 unsigned long unused,
-		 struct task_struct * p, struct pt_regs * regs)
-{
-	struct pt_regs * childregs;
-	struct switch_stack * childstack, *stack;
-	unsigned long *retp;
-
-	childregs = (struct pt_regs *) (task_stack_page(p) + THREAD_SIZE) - 1;
-
-	*childregs = *regs;
-	childregs->d0 = 0;
-
-	retp = ((unsigned long *) regs);
-	stack = ((struct switch_stack *) retp) - 1;
-
-	childstack = ((struct switch_stack *) childregs) - 1;
-	*childstack = *stack;
-	childstack->retpc = (unsigned long)ret_from_fork;
-
-	p->thread.usp = usp;
-	p->thread.ksp = (unsigned long)childstack;
-
-	if (clone_flags & CLONE_SETTLS)
-		task_thread_info(p)->tp_value = regs->d5;
-
-	/*
-	 * Must save the current SFC/DFC value, NOT the value when
-	 * the parent was last descheduled - RGH  10-08-96
-	 */
-	p->thread.fs = get_fs().seg;
-
-	if (!FPU_IS_EMU) {
-		/* Copy the current fpu state */
-		asm volatile ("fsave %0" : : "m" (p->thread.fpstate[0]) : "memory");
-
-		if (!CPU_IS_060 ? p->thread.fpstate[0] : p->thread.fpstate[2])
-		  asm volatile ("fmovemx %/fp0-%/fp7,%0\n\t"
-				"fmoveml %/fpiar/%/fpcr/%/fpsr,%1"
-				: : "m" (p->thread.fp[0]), "m" (p->thread.fpcntl[0])
-				: "memory");
-		/* Restore the state in case the fpu was busy */
-		asm volatile ("frestore %0" : : "m" (p->thread.fpstate[0]));
-	}
-
-	return 0;
-}
-
-/* Fill in the fpu structure for a core dump.  */
-
-int dump_fpu (struct pt_regs *regs, struct user_m68kfp_struct *fpu)
-{
-	char fpustate[216];
-
-	if (FPU_IS_EMU) {
-		int i;
-
-		memcpy(fpu->fpcntl, current->thread.fpcntl, 12);
-		memcpy(fpu->fpregs, current->thread.fp, 96);
-		/* Convert internal fpu reg representation
-		 * into long double format
-		 */
-		for (i = 0; i < 24; i += 3)
-			fpu->fpregs[i] = ((fpu->fpregs[i] & 0xffff0000) << 15) |
-			                 ((fpu->fpregs[i] & 0x0000ffff) << 16);
-		return 1;
-	}
-
-	/* First dump the fpu context to avoid protocol violation.  */
-	asm volatile ("fsave %0" :: "m" (fpustate[0]) : "memory");
-	if (!CPU_IS_060 ? !fpustate[0] : !fpustate[2])
-		return 0;
-
-	asm volatile ("fmovem %/fpiar/%/fpcr/%/fpsr,%0"
-		:: "m" (fpu->fpcntl[0])
-		: "memory");
-	asm volatile ("fmovemx %/fp0-%/fp7,%0"
-		:: "m" (fpu->fpregs[0])
-		: "memory");
-	return 1;
-}
-EXPORT_SYMBOL(dump_fpu);
-
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys_execve(const char __user *name,
-			  const char __user *const __user *argv,
-			  const char __user *const __user *envp)
-{
-	int error;
-	char * filename;
-	struct pt_regs *regs = (struct pt_regs *) &name;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = do_execve(filename, argv, envp, regs);
-	putname(filename);
-	return error;
-}
-
-unsigned long get_wchan(struct task_struct *p)
-{
-	unsigned long fp, pc;
-	unsigned long stack_page;
-	int count = 0;
-	if (!p || p == current || p->state == TASK_RUNNING)
-		return 0;
-
-	stack_page = (unsigned long)task_stack_page(p);
-	fp = ((struct switch_stack *)p->thread.ksp)->a6;
-	do {
-		if (fp < stack_page+sizeof(struct thread_info) ||
-		    fp >= 8184+stack_page)
-			return 0;
-		pc = ((unsigned long *)fp)[1];
-		if (!in_sched_functions(pc))
-			return pc;
-		fp = *(unsigned long *) fp;
-	} while (count++ < 16);
-	return 0;
-}

diff --git a/arch/m68k/kernel/process_mm.c b/arch/m68k/kernel/process_mm.c
new file mode 100644
index 0000000..c2a1fc2
--- /dev/null
+++ b/arch/m68k/kernel/process_mm.c

@@ -0,0 +1,354 @@
+/*
+ *  linux/arch/m68k/kernel/process.c
+ *
+ *  Copyright (C) 1995  Hamish Macdonald
+ *
+ *  68060 fixes by Jesper Skov
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/traps.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+#include <asm/pgtable.h>
+
+/*
+ * Initial task/thread structure. Make this a per-architecture thing,
+ * because different architectures tend to have different
+ * alignment requirements and potentially different initial
+ * setup.
+ */
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+union thread_union init_thread_union __init_task_data
+	__attribute__((aligned(THREAD_SIZE))) =
+		{ INIT_THREAD_INFO(init_task) };
+
+/* initial task structure */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
+
+asmlinkage void ret_from_fork(void);
+
+
+/*
+ * Return saved PC from a blocked thread
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+	struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
+	/* Check whether the thread is blocked in resume() */
+	if (in_sched_functions(sw->retpc))
+		return ((unsigned long *)sw->a6)[1];
+	else
+		return sw->retpc;
+}
+
+/*
+ * The idle loop on an m68k..
+ */
+static void default_idle(void)
+{
+	if (!need_resched())
+#if defined(MACH_ATARI_ONLY)
+		/* block out HSYNC on the atari (falcon) */
+		__asm__("stop #0x2200" : : : "cc");
+#else
+		__asm__("stop #0x2000" : : : "cc");
+#endif
+}
+
+void (*idle)(void) = default_idle;
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle(void)
+{
+	/* endless idle loop with no priority at all */
+	while (1) {
+		while (!need_resched())
+			idle();
+		preempt_enable_no_resched();
+		schedule();
+		preempt_disable();
+	}
+}
+
+void machine_restart(char * __unused)
+{
+	if (mach_reset)
+		mach_reset();
+	for (;;);
+}
+
+void machine_halt(void)
+{
+	if (mach_halt)
+		mach_halt();
+	for (;;);
+}
+
+void machine_power_off(void)
+{
+	if (mach_power_off)
+		mach_power_off();
+	for (;;);
+}
+
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
+
+void show_regs(struct pt_regs * regs)
+{
+	printk("\n");
+	printk("Format %02x  Vector: %04x  PC: %08lx  Status: %04x    %s\n",
+	       regs->format, regs->vector, regs->pc, regs->sr, print_tainted());
+	printk("ORIG_D0: %08lx  D0: %08lx  A2: %08lx  A1: %08lx\n",
+	       regs->orig_d0, regs->d0, regs->a2, regs->a1);
+	printk("A0: %08lx  D5: %08lx  D4: %08lx\n",
+	       regs->a0, regs->d5, regs->d4);
+	printk("D3: %08lx  D2: %08lx  D1: %08lx\n",
+	       regs->d3, regs->d2, regs->d1);
+	if (!(regs->sr & PS_S))
+		printk("USP: %08lx\n", rdusp());
+}
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+{
+	int pid;
+	mm_segment_t fs;
+
+	fs = get_fs();
+	set_fs (KERNEL_DS);
+
+	{
+	register long retval __asm__ ("d0");
+	register long clone_arg __asm__ ("d1") = flags | CLONE_VM | CLONE_UNTRACED;
+
+	retval = __NR_clone;
+	__asm__ __volatile__
+	  ("clrl %%d2\n\t"
+	   "trap #0\n\t"		/* Linux/m68k system call */
+	   "tstl %0\n\t"		/* child or parent */
+	   "jne 1f\n\t"			/* parent - jump */
+	   "lea %%sp@(%c7),%6\n\t"	/* reload current */
+	   "movel %6@,%6\n\t"
+	   "movel %3,%%sp@-\n\t"	/* push argument */
+	   "jsr %4@\n\t"		/* call fn */
+	   "movel %0,%%d1\n\t"		/* pass exit value */
+	   "movel %2,%%d0\n\t"		/* exit */
+	   "trap #0\n"
+	   "1:"
+	   : "+d" (retval)
+	   : "i" (__NR_clone), "i" (__NR_exit),
+	     "r" (arg), "a" (fn), "d" (clone_arg), "r" (current),
+	     "i" (-THREAD_SIZE)
+	   : "d2");
+
+	pid = retval;
+	}
+
+	set_fs (fs);
+	return pid;
+}
+EXPORT_SYMBOL(kernel_thread);
+
+void flush_thread(void)
+{
+	unsigned long zero = 0;
+	set_fs(USER_DS);
+	current->thread.fs = __USER_DS;
+	if (!FPU_IS_EMU)
+		asm volatile (".chip 68k/68881\n\t"
+			      "frestore %0@\n\t"
+			      ".chip 68k" : : "a" (&zero));
+}
+
+/*
+ * "m68k_fork()".. By the time we get here, the
+ * non-volatile registers have also been saved on the
+ * stack. We do some ugly pointer stuff here.. (see
+ * also copy_thread)
+ */
+
+asmlinkage int m68k_fork(struct pt_regs *regs)
+{
+	return do_fork(SIGCHLD, rdusp(), regs, 0, NULL, NULL);
+}
+
+asmlinkage int m68k_vfork(struct pt_regs *regs)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, rdusp(), regs, 0,
+		       NULL, NULL);
+}
+
+asmlinkage int m68k_clone(struct pt_regs *regs)
+{
+	unsigned long clone_flags;
+	unsigned long newsp;
+	int __user *parent_tidptr, *child_tidptr;
+
+	/* syscall2 puts clone_flags in d1 and usp in d2 */
+	clone_flags = regs->d1;
+	newsp = regs->d2;
+	parent_tidptr = (int __user *)regs->d3;
+	child_tidptr = (int __user *)regs->d4;
+	if (!newsp)
+		newsp = rdusp();
+	return do_fork(clone_flags, newsp, regs, 0,
+		       parent_tidptr, child_tidptr);
+}
+
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+		 unsigned long unused,
+		 struct task_struct * p, struct pt_regs * regs)
+{
+	struct pt_regs * childregs;
+	struct switch_stack * childstack, *stack;
+	unsigned long *retp;
+
+	childregs = (struct pt_regs *) (task_stack_page(p) + THREAD_SIZE) - 1;
+
+	*childregs = *regs;
+	childregs->d0 = 0;
+
+	retp = ((unsigned long *) regs);
+	stack = ((struct switch_stack *) retp) - 1;
+
+	childstack = ((struct switch_stack *) childregs) - 1;
+	*childstack = *stack;
+	childstack->retpc = (unsigned long)ret_from_fork;
+
+	p->thread.usp = usp;
+	p->thread.ksp = (unsigned long)childstack;
+
+	if (clone_flags & CLONE_SETTLS)
+		task_thread_info(p)->tp_value = regs->d5;
+
+	/*
+	 * Must save the current SFC/DFC value, NOT the value when
+	 * the parent was last descheduled - RGH  10-08-96
+	 */
+	p->thread.fs = get_fs().seg;
+
+	if (!FPU_IS_EMU) {
+		/* Copy the current fpu state */
+		asm volatile ("fsave %0" : : "m" (p->thread.fpstate[0]) : "memory");
+
+		if (!CPU_IS_060 ? p->thread.fpstate[0] : p->thread.fpstate[2])
+		  asm volatile ("fmovemx %/fp0-%/fp7,%0\n\t"
+				"fmoveml %/fpiar/%/fpcr/%/fpsr,%1"
+				: : "m" (p->thread.fp[0]), "m" (p->thread.fpcntl[0])
+				: "memory");
+		/* Restore the state in case the fpu was busy */
+		asm volatile ("frestore %0" : : "m" (p->thread.fpstate[0]));
+	}
+
+	return 0;
+}
+
+/* Fill in the fpu structure for a core dump.  */
+
+int dump_fpu (struct pt_regs *regs, struct user_m68kfp_struct *fpu)
+{
+	char fpustate[216];
+
+	if (FPU_IS_EMU) {
+		int i;
+
+		memcpy(fpu->fpcntl, current->thread.fpcntl, 12);
+		memcpy(fpu->fpregs, current->thread.fp, 96);
+		/* Convert internal fpu reg representation
+		 * into long double format
+		 */
+		for (i = 0; i < 24; i += 3)
+			fpu->fpregs[i] = ((fpu->fpregs[i] & 0xffff0000) << 15) |
+			                 ((fpu->fpregs[i] & 0x0000ffff) << 16);
+		return 1;
+	}
+
+	/* First dump the fpu context to avoid protocol violation.  */
+	asm volatile ("fsave %0" :: "m" (fpustate[0]) : "memory");
+	if (!CPU_IS_060 ? !fpustate[0] : !fpustate[2])
+		return 0;
+
+	asm volatile ("fmovem %/fpiar/%/fpcr/%/fpsr,%0"
+		:: "m" (fpu->fpcntl[0])
+		: "memory");
+	asm volatile ("fmovemx %/fp0-%/fp7,%0"
+		:: "m" (fpu->fpregs[0])
+		: "memory");
+	return 1;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage int sys_execve(const char __user *name,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp)
+{
+	int error;
+	char * filename;
+	struct pt_regs *regs = (struct pt_regs *) &name;
+
+	filename = getname(name);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		return error;
+	error = do_execve(filename, argv, envp, regs);
+	putname(filename);
+	return error;
+}
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long fp, pc;
+	unsigned long stack_page;
+	int count = 0;
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	stack_page = (unsigned long)task_stack_page(p);
+	fp = ((struct switch_stack *)p->thread.ksp)->a6;
+	do {
+		if (fp < stack_page+sizeof(struct thread_info) ||
+		    fp >= 8184+stack_page)
+			return 0;
+		pc = ((unsigned long *)fp)[1];
+		if (!in_sched_functions(pc))
+			return pc;
+		fp = *(unsigned long *) fp;
+	} while (count++ < 16);
+	return 0;
+}

diff --git a/arch/m68knommu/kernel/process.c b/arch/m68k/kernel/process_no.c
similarity index 100%
rename from arch/m68knommu/kernel/process.c
rename to arch/m68k/kernel/process_no.c


diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 0b25268..07a4175 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c

@@ -1,277 +1,5 @@
-/*
- *  linux/arch/m68k/kernel/ptrace.c
- *
- *  Copyright (C) 1994 by Hamish Macdonald
- *  Taken from linux/kernel/ptrace.c and modified for M680x0.
- *  linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of
- * this archive for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/user.h>
-#include <linux/signal.h>
-
-#include <asm/uaccess.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/* determines which bits in the SR the user has access to. */
-/* 1 = access 0 = no access */
-#define SR_MASK 0x001f
-
-/* sets the trace bits. */
-#define TRACE_BITS 0xC000
-#define T1_BIT 0x8000
-#define T0_BIT 0x4000
-
-/* Find the stack offset for a register, relative to thread.esp0. */
-#define PT_REG(reg)	((long)&((struct pt_regs *)0)->reg)
-#define SW_REG(reg)	((long)&((struct switch_stack *)0)->reg \
-			 - sizeof(struct switch_stack))
-/* Mapping from PT_xxx to the stack offset at which the register is
-   saved.  Notice that usp has no stack-slot and needs to be treated
-   specially (see get_reg/put_reg below). */
-static const int regoff[] = {
-	[0]	= PT_REG(d1),
-	[1]	= PT_REG(d2),
-	[2]	= PT_REG(d3),
-	[3]	= PT_REG(d4),
-	[4]	= PT_REG(d5),
-	[5]	= SW_REG(d6),
-	[6]	= SW_REG(d7),
-	[7]	= PT_REG(a0),
-	[8]	= PT_REG(a1),
-	[9]	= PT_REG(a2),
-	[10]	= SW_REG(a3),
-	[11]	= SW_REG(a4),
-	[12]	= SW_REG(a5),
-	[13]	= SW_REG(a6),
-	[14]	= PT_REG(d0),
-	[15]	= -1,
-	[16]	= PT_REG(orig_d0),
-	[17]	= PT_REG(sr),
-	[18]	= PT_REG(pc),
-};
-
-/*
- * Get contents of register REGNO in task TASK.
- */
-static inline long get_reg(struct task_struct *task, int regno)
-{
-	unsigned long *addr;
-
-	if (regno == PT_USP)
-		addr = &task->thread.usp;
-	else if (regno < ARRAY_SIZE(regoff))
-		addr = (unsigned long *)(task->thread.esp0 + regoff[regno]);
-	else
-		return 0;
-	/* Need to take stkadj into account. */
-	if (regno == PT_SR || regno == PT_PC) {
-		long stkadj = *(long *)(task->thread.esp0 + PT_REG(stkadj));
-		addr = (unsigned long *) ((unsigned long)addr + stkadj);
-		/* The sr is actually a 16 bit register.  */
-		if (regno == PT_SR)
-			return *(unsigned short *)addr;
-	}
-	return *addr;
-}
-
-/*
- * Write contents of register REGNO in task TASK.
- */
-static inline int put_reg(struct task_struct *task, int regno,
-			  unsigned long data)
-{
-	unsigned long *addr;
-
-	if (regno == PT_USP)
-		addr = &task->thread.usp;
-	else if (regno < ARRAY_SIZE(regoff))
-		addr = (unsigned long *)(task->thread.esp0 + regoff[regno]);
-	else
-		return -1;
-	/* Need to take stkadj into account. */
-	if (regno == PT_SR || regno == PT_PC) {
-		long stkadj = *(long *)(task->thread.esp0 + PT_REG(stkadj));
-		addr = (unsigned long *) ((unsigned long)addr + stkadj);
-		/* The sr is actually a 16 bit register.  */
-		if (regno == PT_SR) {
-			*(unsigned short *)addr = data;
-			return 0;
-		}
-	}
-	*addr = data;
-	return 0;
-}
-
-/*
- * Make sure the single step bit is not set.
- */
-static inline void singlestep_disable(struct task_struct *child)
-{
-	unsigned long tmp = get_reg(child, PT_SR) & ~TRACE_BITS;
-	put_reg(child, PT_SR, tmp);
-	clear_tsk_thread_flag(child, TIF_DELAYED_TRACE);
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- */
-void ptrace_disable(struct task_struct *child)
-{
-	singlestep_disable(child);
-}
-
-void user_enable_single_step(struct task_struct *child)
-{
-	unsigned long tmp = get_reg(child, PT_SR) & ~TRACE_BITS;
-	put_reg(child, PT_SR, tmp | T1_BIT);
-	set_tsk_thread_flag(child, TIF_DELAYED_TRACE);
-}
-
-void user_enable_block_step(struct task_struct *child)
-{
-	unsigned long tmp = get_reg(child, PT_SR) & ~TRACE_BITS;
-	put_reg(child, PT_SR, tmp | T0_BIT);
-}
-
-void user_disable_single_step(struct task_struct *child)
-{
-	singlestep_disable(child);
-}
-
-long arch_ptrace(struct task_struct *child, long request,
-		 unsigned long addr, unsigned long data)
-{
-	unsigned long tmp;
-	int i, ret = 0;
-	int regno = addr >> 2; /* temporary hack. */
-	unsigned long __user *datap = (unsigned long __user *) data;
-
-	switch (request) {
-	/* read the word at location addr in the USER area. */
-	case PTRACE_PEEKUSR:
-		if (addr & 3)
-			goto out_eio;
-
-		if (regno >= 0 && regno < 19) {
-			tmp = get_reg(child, regno);
-		} else if (regno >= 21 && regno < 49) {
-			tmp = child->thread.fp[regno - 21];
-			/* Convert internal fpu reg representation
-			 * into long double format
-			 */
-			if (FPU_IS_EMU && (regno < 45) && !(regno % 3))
-				tmp = ((tmp & 0xffff0000) << 15) |
-				      ((tmp & 0x0000ffff) << 16);
-		} else
-			goto out_eio;
-		ret = put_user(tmp, datap);
-		break;
-
-	case PTRACE_POKEUSR:
-	/* write the word at location addr in the USER area */
-		if (addr & 3)
-			goto out_eio;
-
-		if (regno == PT_SR) {
-			data &= SR_MASK;
-			data |= get_reg(child, PT_SR) & ~SR_MASK;
-		}
-		if (regno >= 0 && regno < 19) {
-			if (put_reg(child, regno, data))
-				goto out_eio;
-		} else if (regno >= 21 && regno < 48) {
-			/* Convert long double format
-			 * into internal fpu reg representation
-			 */
-			if (FPU_IS_EMU && (regno < 45) && !(regno % 3)) {
-				data <<= 15;
-				data = (data & 0xffff0000) |
-				       ((data & 0x0000ffff) >> 1);
-			}
-			child->thread.fp[regno - 21] = data;
-		} else
-			goto out_eio;
-		break;
-
-	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
-		for (i = 0; i < 19; i++) {
-			tmp = get_reg(child, i);
-			ret = put_user(tmp, datap);
-			if (ret)
-				break;
-			datap++;
-		}
-		break;
-
-	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
-		for (i = 0; i < 19; i++) {
-			ret = get_user(tmp, datap);
-			if (ret)
-				break;
-			if (i == PT_SR) {
-				tmp &= SR_MASK;
-				tmp |= get_reg(child, PT_SR) & ~SR_MASK;
-			}
-			put_reg(child, i, tmp);
-			datap++;
-		}
-		break;
-
-	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
-		if (copy_to_user(datap, &child->thread.fp,
-				 sizeof(struct user_m68kfp_struct)))
-			ret = -EFAULT;
-		break;
-
-	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
-		if (copy_from_user(&child->thread.fp, datap,
-				   sizeof(struct user_m68kfp_struct)))
-			ret = -EFAULT;
-		break;
-
-	case PTRACE_GET_THREAD_AREA:
-		ret = put_user(task_thread_info(child)->tp_value, datap);
-		break;
-
-	default:
-		ret = ptrace_request(child, request, addr, data);
-		break;
-	}
-
-	return ret;
-out_eio:
-	return -EIO;
-}
-
-asmlinkage void syscall_trace(void)
-{
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				 ? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
+#ifdef CONFIG_MMU
+#include "ptrace_mm.c"
+#else
+#include "ptrace_no.c"
+#endif

diff --git a/arch/m68k/kernel/ptrace_mm.c b/arch/m68k/kernel/ptrace_mm.c
new file mode 100644
index 0000000..0b25268
--- /dev/null
+++ b/arch/m68k/kernel/ptrace_mm.c

@@ -0,0 +1,277 @@
+/*
+ *  linux/arch/m68k/kernel/ptrace.c
+ *
+ *  Copyright (C) 1994 by Hamish Macdonald
+ *  Taken from linux/kernel/ptrace.c and modified for M680x0.
+ *  linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/signal.h>
+
+#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/* determines which bits in the SR the user has access to. */
+/* 1 = access 0 = no access */
+#define SR_MASK 0x001f
+
+/* sets the trace bits. */
+#define TRACE_BITS 0xC000
+#define T1_BIT 0x8000
+#define T0_BIT 0x4000
+
+/* Find the stack offset for a register, relative to thread.esp0. */
+#define PT_REG(reg)	((long)&((struct pt_regs *)0)->reg)
+#define SW_REG(reg)	((long)&((struct switch_stack *)0)->reg \
+			 - sizeof(struct switch_stack))
+/* Mapping from PT_xxx to the stack offset at which the register is
+   saved.  Notice that usp has no stack-slot and needs to be treated
+   specially (see get_reg/put_reg below). */
+static const int regoff[] = {
+	[0]	= PT_REG(d1),
+	[1]	= PT_REG(d2),
+	[2]	= PT_REG(d3),
+	[3]	= PT_REG(d4),
+	[4]	= PT_REG(d5),
+	[5]	= SW_REG(d6),
+	[6]	= SW_REG(d7),
+	[7]	= PT_REG(a0),
+	[8]	= PT_REG(a1),
+	[9]	= PT_REG(a2),
+	[10]	= SW_REG(a3),
+	[11]	= SW_REG(a4),
+	[12]	= SW_REG(a5),
+	[13]	= SW_REG(a6),
+	[14]	= PT_REG(d0),
+	[15]	= -1,
+	[16]	= PT_REG(orig_d0),
+	[17]	= PT_REG(sr),
+	[18]	= PT_REG(pc),
+};
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+static inline long get_reg(struct task_struct *task, int regno)
+{
+	unsigned long *addr;
+
+	if (regno == PT_USP)
+		addr = &task->thread.usp;
+	else if (regno < ARRAY_SIZE(regoff))
+		addr = (unsigned long *)(task->thread.esp0 + regoff[regno]);
+	else
+		return 0;
+	/* Need to take stkadj into account. */
+	if (regno == PT_SR || regno == PT_PC) {
+		long stkadj = *(long *)(task->thread.esp0 + PT_REG(stkadj));
+		addr = (unsigned long *) ((unsigned long)addr + stkadj);
+		/* The sr is actually a 16 bit register.  */
+		if (regno == PT_SR)
+			return *(unsigned short *)addr;
+	}
+	return *addr;
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+static inline int put_reg(struct task_struct *task, int regno,
+			  unsigned long data)
+{
+	unsigned long *addr;
+
+	if (regno == PT_USP)
+		addr = &task->thread.usp;
+	else if (regno < ARRAY_SIZE(regoff))
+		addr = (unsigned long *)(task->thread.esp0 + regoff[regno]);
+	else
+		return -1;
+	/* Need to take stkadj into account. */
+	if (regno == PT_SR || regno == PT_PC) {
+		long stkadj = *(long *)(task->thread.esp0 + PT_REG(stkadj));
+		addr = (unsigned long *) ((unsigned long)addr + stkadj);
+		/* The sr is actually a 16 bit register.  */
+		if (regno == PT_SR) {
+			*(unsigned short *)addr = data;
+			return 0;
+		}
+	}
+	*addr = data;
+	return 0;
+}
+
+/*
+ * Make sure the single step bit is not set.
+ */
+static inline void singlestep_disable(struct task_struct *child)
+{
+	unsigned long tmp = get_reg(child, PT_SR) & ~TRACE_BITS;
+	put_reg(child, PT_SR, tmp);
+	clear_tsk_thread_flag(child, TIF_DELAYED_TRACE);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	singlestep_disable(child);
+}
+
+void user_enable_single_step(struct task_struct *child)
+{
+	unsigned long tmp = get_reg(child, PT_SR) & ~TRACE_BITS;
+	put_reg(child, PT_SR, tmp | T1_BIT);
+	set_tsk_thread_flag(child, TIF_DELAYED_TRACE);
+}
+
+void user_enable_block_step(struct task_struct *child)
+{
+	unsigned long tmp = get_reg(child, PT_SR) & ~TRACE_BITS;
+	put_reg(child, PT_SR, tmp | T0_BIT);
+}
+
+void user_disable_single_step(struct task_struct *child)
+{
+	singlestep_disable(child);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	unsigned long tmp;
+	int i, ret = 0;
+	int regno = addr >> 2; /* temporary hack. */
+	unsigned long __user *datap = (unsigned long __user *) data;
+
+	switch (request) {
+	/* read the word at location addr in the USER area. */
+	case PTRACE_PEEKUSR:
+		if (addr & 3)
+			goto out_eio;
+
+		if (regno >= 0 && regno < 19) {
+			tmp = get_reg(child, regno);
+		} else if (regno >= 21 && regno < 49) {
+			tmp = child->thread.fp[regno - 21];
+			/* Convert internal fpu reg representation
+			 * into long double format
+			 */
+			if (FPU_IS_EMU && (regno < 45) && !(regno % 3))
+				tmp = ((tmp & 0xffff0000) << 15) |
+				      ((tmp & 0x0000ffff) << 16);
+		} else
+			goto out_eio;
+		ret = put_user(tmp, datap);
+		break;
+
+	case PTRACE_POKEUSR:
+	/* write the word at location addr in the USER area */
+		if (addr & 3)
+			goto out_eio;
+
+		if (regno == PT_SR) {
+			data &= SR_MASK;
+			data |= get_reg(child, PT_SR) & ~SR_MASK;
+		}
+		if (regno >= 0 && regno < 19) {
+			if (put_reg(child, regno, data))
+				goto out_eio;
+		} else if (regno >= 21 && regno < 48) {
+			/* Convert long double format
+			 * into internal fpu reg representation
+			 */
+			if (FPU_IS_EMU && (regno < 45) && !(regno % 3)) {
+				data <<= 15;
+				data = (data & 0xffff0000) |
+				       ((data & 0x0000ffff) >> 1);
+			}
+			child->thread.fp[regno - 21] = data;
+		} else
+			goto out_eio;
+		break;
+
+	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
+		for (i = 0; i < 19; i++) {
+			tmp = get_reg(child, i);
+			ret = put_user(tmp, datap);
+			if (ret)
+				break;
+			datap++;
+		}
+		break;
+
+	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
+		for (i = 0; i < 19; i++) {
+			ret = get_user(tmp, datap);
+			if (ret)
+				break;
+			if (i == PT_SR) {
+				tmp &= SR_MASK;
+				tmp |= get_reg(child, PT_SR) & ~SR_MASK;
+			}
+			put_reg(child, i, tmp);
+			datap++;
+		}
+		break;
+
+	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
+		if (copy_to_user(datap, &child->thread.fp,
+				 sizeof(struct user_m68kfp_struct)))
+			ret = -EFAULT;
+		break;
+
+	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
+		if (copy_from_user(&child->thread.fp, datap,
+				   sizeof(struct user_m68kfp_struct)))
+			ret = -EFAULT;
+		break;
+
+	case PTRACE_GET_THREAD_AREA:
+		ret = put_user(task_thread_info(child)->tp_value, datap);
+		break;
+
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+out_eio:
+	return -EIO;
+}
+
+asmlinkage void syscall_trace(void)
+{
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+				 ? 0x80 : 0));
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}

diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68k/kernel/ptrace_no.c
similarity index 100%
rename from arch/m68knommu/kernel/ptrace.c
rename to arch/m68k/kernel/ptrace_no.c


diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c
index 334d836..4bf129f 100644
--- a/arch/m68k/kernel/setup.c
+++ b/arch/m68k/kernel/setup.c

@@ -1,533 +1,5 @@
-/*
- *  linux/arch/m68k/kernel/setup.c
- *
- *  Copyright (C) 1995  Hamish Macdonald
- */
-
-/*
- * This file handles the architecture-dependent parts of system setup
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/fs.h>
-#include <linux/console.h>
-#include <linux/genhd.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/initrd.h>
-
-#include <asm/bootinfo.h>
-#include <asm/sections.h>
-#include <asm/setup.h>
-#include <asm/fpu.h>
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#ifdef CONFIG_AMIGA
-#include <asm/amigahw.h>
-#endif
-#ifdef CONFIG_ATARI
-#include <asm/atarihw.h>
-#include <asm/atari_stram.h>
-#endif
-#ifdef CONFIG_SUN3X
-#include <asm/dvma.h>
-#endif
-#include <asm/natfeat.h>
-
-#if !FPSTATESIZE || !NR_IRQS
-#warning No CPU/platform type selected, your kernel will not work!
-#warning Are you building an allnoconfig kernel?
-#endif
-
-unsigned long m68k_machtype;
-EXPORT_SYMBOL(m68k_machtype);
-unsigned long m68k_cputype;
-EXPORT_SYMBOL(m68k_cputype);
-unsigned long m68k_fputype;
-unsigned long m68k_mmutype;
-EXPORT_SYMBOL(m68k_mmutype);
-#ifdef CONFIG_VME
-unsigned long vme_brdtype;
-EXPORT_SYMBOL(vme_brdtype);
-#endif
-
-int m68k_is040or060;
-EXPORT_SYMBOL(m68k_is040or060);
-
-extern unsigned long availmem;
-
-int m68k_num_memory;
-EXPORT_SYMBOL(m68k_num_memory);
-int m68k_realnum_memory;
-EXPORT_SYMBOL(m68k_realnum_memory);
-unsigned long m68k_memoffset;
-struct mem_info m68k_memory[NUM_MEMINFO];
-EXPORT_SYMBOL(m68k_memory);
-
-struct mem_info m68k_ramdisk;
-
-static char m68k_command_line[CL_SIZE];
-
-void (*mach_sched_init) (irq_handler_t handler) __initdata = NULL;
-/* machine dependent irq functions */
-void (*mach_init_IRQ) (void) __initdata = NULL;
-void (*mach_get_model) (char *model);
-void (*mach_get_hardware_list) (struct seq_file *m);
-/* machine dependent timer functions */
-unsigned long (*mach_gettimeoffset) (void);
-int (*mach_hwclk) (int, struct rtc_time*);
-EXPORT_SYMBOL(mach_hwclk);
-int (*mach_set_clock_mmss) (unsigned long);
-unsigned int (*mach_get_ss)(void);
-int (*mach_get_rtc_pll)(struct rtc_pll_info *);
-int (*mach_set_rtc_pll)(struct rtc_pll_info *);
-EXPORT_SYMBOL(mach_get_ss);
-EXPORT_SYMBOL(mach_get_rtc_pll);
-EXPORT_SYMBOL(mach_set_rtc_pll);
-void (*mach_reset)( void );
-void (*mach_halt)( void );
-void (*mach_power_off)( void );
-long mach_max_dma_address = 0x00ffffff; /* default set to the lower 16MB */
-#ifdef CONFIG_HEARTBEAT
-void (*mach_heartbeat) (int);
-EXPORT_SYMBOL(mach_heartbeat);
-#endif
-#ifdef CONFIG_M68K_L2_CACHE
-void (*mach_l2_flush) (int);
-#endif
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
-void (*mach_beep)(unsigned int, unsigned int);
-EXPORT_SYMBOL(mach_beep);
-#endif
-#if defined(CONFIG_ISA) && defined(MULTI_ISA)
-int isa_type;
-int isa_sex;
-EXPORT_SYMBOL(isa_type);
-EXPORT_SYMBOL(isa_sex);
-#endif
-
-extern int amiga_parse_bootinfo(const struct bi_record *);
-extern int atari_parse_bootinfo(const struct bi_record *);
-extern int mac_parse_bootinfo(const struct bi_record *);
-extern int q40_parse_bootinfo(const struct bi_record *);
-extern int bvme6000_parse_bootinfo(const struct bi_record *);
-extern int mvme16x_parse_bootinfo(const struct bi_record *);
-extern int mvme147_parse_bootinfo(const struct bi_record *);
-extern int hp300_parse_bootinfo(const struct bi_record *);
-extern int apollo_parse_bootinfo(const struct bi_record *);
-
-extern void config_amiga(void);
-extern void config_atari(void);
-extern void config_mac(void);
-extern void config_sun3(void);
-extern void config_apollo(void);
-extern void config_mvme147(void);
-extern void config_mvme16x(void);
-extern void config_bvme6000(void);
-extern void config_hp300(void);
-extern void config_q40(void);
-extern void config_sun3x(void);
-
-#define MASK_256K 0xfffc0000
-
-extern void paging_init(void);
-
-static void __init m68k_parse_bootinfo(const struct bi_record *record)
-{
-	while (record->tag != BI_LAST) {
-		int unknown = 0;
-		const unsigned long *data = record->data;
-
-		switch (record->tag) {
-		case BI_MACHTYPE:
-		case BI_CPUTYPE:
-		case BI_FPUTYPE:
-		case BI_MMUTYPE:
-			/* Already set up by head.S */
-			break;
-
-		case BI_MEMCHUNK:
-			if (m68k_num_memory < NUM_MEMINFO) {
-				m68k_memory[m68k_num_memory].addr = data[0];
-				m68k_memory[m68k_num_memory].size = data[1];
-				m68k_num_memory++;
-			} else
-				printk("m68k_parse_bootinfo: too many memory chunks\n");
-			break;
-
-		case BI_RAMDISK:
-			m68k_ramdisk.addr = data[0];
-			m68k_ramdisk.size = data[1];
-			break;
-
-		case BI_COMMAND_LINE:
-			strlcpy(m68k_command_line, (const char *)data,
-				sizeof(m68k_command_line));
-			break;
-
-		default:
-			if (MACH_IS_AMIGA)
-				unknown = amiga_parse_bootinfo(record);
-			else if (MACH_IS_ATARI)
-				unknown = atari_parse_bootinfo(record);
-			else if (MACH_IS_MAC)
-				unknown = mac_parse_bootinfo(record);
-			else if (MACH_IS_Q40)
-				unknown = q40_parse_bootinfo(record);
-			else if (MACH_IS_BVME6000)
-				unknown = bvme6000_parse_bootinfo(record);
-			else if (MACH_IS_MVME16x)
-				unknown = mvme16x_parse_bootinfo(record);
-			else if (MACH_IS_MVME147)
-				unknown = mvme147_parse_bootinfo(record);
-			else if (MACH_IS_HP300)
-				unknown = hp300_parse_bootinfo(record);
-			else if (MACH_IS_APOLLO)
-				unknown = apollo_parse_bootinfo(record);
-			else
-				unknown = 1;
-		}
-		if (unknown)
-			printk("m68k_parse_bootinfo: unknown tag 0x%04x ignored\n",
-			       record->tag);
-		record = (struct bi_record *)((unsigned long)record +
-					      record->size);
-	}
-
-	m68k_realnum_memory = m68k_num_memory;
-#ifdef CONFIG_SINGLE_MEMORY_CHUNK
-	if (m68k_num_memory > 1) {
-		printk("Ignoring last %i chunks of physical memory\n",
-		       (m68k_num_memory - 1));
-		m68k_num_memory = 1;
-	}
-#endif
-}
-
-void __init setup_arch(char **cmdline_p)
-{
-	int i;
-
-	/* The bootinfo is located right after the kernel bss */
-	m68k_parse_bootinfo((const struct bi_record *)_end);
-
-	if (CPU_IS_040)
-		m68k_is040or060 = 4;
-	else if (CPU_IS_060)
-		m68k_is040or060 = 6;
-
-	/* FIXME: m68k_fputype is passed in by Penguin booter, which can
-	 * be confused by software FPU emulation. BEWARE.
-	 * We should really do our own FPU check at startup.
-	 * [what do we do with buggy 68LC040s? if we have problems
-	 *  with them, we should add a test to check_bugs() below] */
-#ifndef CONFIG_M68KFPU_EMU_ONLY
-	/* clear the fpu if we have one */
-	if (m68k_fputype & (FPU_68881|FPU_68882|FPU_68040|FPU_68060)) {
-		volatile int zero = 0;
-		asm volatile ("frestore %0" : : "m" (zero));
-	}
-#endif
-
-	if (CPU_IS_060) {
-		u32 pcr;
-
-		asm (".chip 68060; movec %%pcr,%0; .chip 68k"
-		     : "=d" (pcr));
-		if (((pcr >> 8) & 0xff) <= 5) {
-			printk("Enabling workaround for errata I14\n");
-			asm (".chip 68060; movec %0,%%pcr; .chip 68k"
-			     : : "d" (pcr | 0x20));
-		}
-	}
-
-	init_mm.start_code = PAGE_OFFSET;
-	init_mm.end_code = (unsigned long)_etext;
-	init_mm.end_data = (unsigned long)_edata;
-	init_mm.brk = (unsigned long)_end;
-
-	*cmdline_p = m68k_command_line;
-	memcpy(boot_command_line, *cmdline_p, CL_SIZE);
-
-	parse_early_param();
-
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-
-	switch (m68k_machtype) {
-#ifdef CONFIG_AMIGA
-	case MACH_AMIGA:
-		config_amiga();
-		break;
-#endif
-#ifdef CONFIG_ATARI
-	case MACH_ATARI:
-		config_atari();
-		break;
-#endif
-#ifdef CONFIG_MAC
-	case MACH_MAC:
-		config_mac();
-		break;
-#endif
-#ifdef CONFIG_SUN3
-	case MACH_SUN3:
-		config_sun3();
-		break;
-#endif
-#ifdef CONFIG_APOLLO
-	case MACH_APOLLO:
-		config_apollo();
-		break;
-#endif
-#ifdef CONFIG_MVME147
-	case MACH_MVME147:
-		config_mvme147();
-		break;
-#endif
-#ifdef CONFIG_MVME16x
-	case MACH_MVME16x:
-		config_mvme16x();
-		break;
-#endif
-#ifdef CONFIG_BVME6000
-	case MACH_BVME6000:
-		config_bvme6000();
-		break;
-#endif
-#ifdef CONFIG_HP300
-	case MACH_HP300:
-		config_hp300();
-		break;
-#endif
-#ifdef CONFIG_Q40
-	case MACH_Q40:
-		config_q40();
-		break;
-#endif
-#ifdef CONFIG_SUN3X
-	case MACH_SUN3X:
-		config_sun3x();
-		break;
-#endif
-	default:
-		panic("No configuration setup");
-	}
-
-#ifdef CONFIG_NATFEAT
-	nf_init();
-#endif
-
-	paging_init();
-
-#ifndef CONFIG_SUN3
-	for (i = 1; i < m68k_num_memory; i++)
-		free_bootmem_node(NODE_DATA(i), m68k_memory[i].addr,
-				  m68k_memory[i].size);
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (m68k_ramdisk.size) {
-		reserve_bootmem_node(__virt_to_node(phys_to_virt(m68k_ramdisk.addr)),
-				     m68k_ramdisk.addr, m68k_ramdisk.size,
-				     BOOTMEM_DEFAULT);
-		initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr);
-		initrd_end = initrd_start + m68k_ramdisk.size;
-		printk("initrd: %08lx - %08lx\n", initrd_start, initrd_end);
-	}
-#endif
-
-#ifdef CONFIG_ATARI
-	if (MACH_IS_ATARI)
-		atari_stram_reserve_pages((void *)availmem);
-#endif
-#ifdef CONFIG_SUN3X
-	if (MACH_IS_SUN3X) {
-		dvma_init();
-	}
-#endif
-
-#endif /* !CONFIG_SUN3 */
-
-/* set ISA defs early as possible */
-#if defined(CONFIG_ISA) && defined(MULTI_ISA)
-	if (MACH_IS_Q40) {
-		isa_type = ISA_TYPE_Q40;
-		isa_sex = 0;
-	}
-#ifdef CONFIG_AMIGA_PCMCIA
-	if (MACH_IS_AMIGA && AMIGAHW_PRESENT(PCMCIA)) {
-		isa_type = ISA_TYPE_AG;
-		isa_sex = 1;
-	}
-#endif
-#endif
-}
-
-static int show_cpuinfo(struct seq_file *m, void *v)
-{
-	const char *cpu, *mmu, *fpu;
-	unsigned long clockfreq, clockfactor;
-
-#define LOOP_CYCLES_68020	(8)
-#define LOOP_CYCLES_68030	(8)
-#define LOOP_CYCLES_68040	(3)
-#define LOOP_CYCLES_68060	(1)
-
-	if (CPU_IS_020) {
-		cpu = "68020";
-		clockfactor = LOOP_CYCLES_68020;
-	} else if (CPU_IS_030) {
-		cpu = "68030";
-		clockfactor = LOOP_CYCLES_68030;
-	} else if (CPU_IS_040) {
-		cpu = "68040";
-		clockfactor = LOOP_CYCLES_68040;
-	} else if (CPU_IS_060) {
-		cpu = "68060";
-		clockfactor = LOOP_CYCLES_68060;
-	} else {
-		cpu = "680x0";
-		clockfactor = 0;
-	}
-
-#ifdef CONFIG_M68KFPU_EMU_ONLY
-	fpu = "none(soft float)";
+#ifdef CONFIG_MMU
+#include "setup_mm.c"
 #else
-	if (m68k_fputype & FPU_68881)
-		fpu = "68881";
-	else if (m68k_fputype & FPU_68882)
-		fpu = "68882";
-	else if (m68k_fputype & FPU_68040)
-		fpu = "68040";
-	else if (m68k_fputype & FPU_68060)
-		fpu = "68060";
-	else if (m68k_fputype & FPU_SUNFPA)
-		fpu = "Sun FPA";
-	else
-		fpu = "none";
+#include "setup_no.c"
 #endif
-
-	if (m68k_mmutype & MMU_68851)
-		mmu = "68851";
-	else if (m68k_mmutype & MMU_68030)
-		mmu = "68030";
-	else if (m68k_mmutype & MMU_68040)
-		mmu = "68040";
-	else if (m68k_mmutype & MMU_68060)
-		mmu = "68060";
-	else if (m68k_mmutype & MMU_SUN3)
-		mmu = "Sun-3";
-	else if (m68k_mmutype & MMU_APOLLO)
-		mmu = "Apollo";
-	else
-		mmu = "unknown";
-
-	clockfreq = loops_per_jiffy * HZ * clockfactor;
-
-	seq_printf(m, "CPU:\t\t%s\n"
-		   "MMU:\t\t%s\n"
-		   "FPU:\t\t%s\n"
-		   "Clocking:\t%lu.%1luMHz\n"
-		   "BogoMips:\t%lu.%02lu\n"
-		   "Calibration:\t%lu loops\n",
-		   cpu, mmu, fpu,
-		   clockfreq/1000000,(clockfreq/100000)%10,
-		   loops_per_jiffy/(500000/HZ),(loops_per_jiffy/(5000/HZ))%100,
-		   loops_per_jiffy);
-	return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-	return *pos < 1 ? (void *)1 : NULL;
-}
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return NULL;
-}
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-const struct seq_operations cpuinfo_op = {
-	.start	= c_start,
-	.next	= c_next,
-	.stop	= c_stop,
-	.show	= show_cpuinfo,
-};
-
-#ifdef CONFIG_PROC_HARDWARE
-static int hardware_proc_show(struct seq_file *m, void *v)
-{
-	char model[80];
-	unsigned long mem;
-	int i;
-
-	if (mach_get_model)
-		mach_get_model(model);
-	else
-		strcpy(model, "Unknown m68k");
-
-	seq_printf(m, "Model:\t\t%s\n", model);
-	for (mem = 0, i = 0; i < m68k_num_memory; i++)
-		mem += m68k_memory[i].size;
-	seq_printf(m, "System Memory:\t%ldK\n", mem >> 10);
-
-	if (mach_get_hardware_list)
-		mach_get_hardware_list(m);
-
-	return 0;
-}
-
-static int hardware_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hardware_proc_show, NULL);
-}
-
-static const struct file_operations hardware_proc_fops = {
-	.open		= hardware_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int __init proc_hardware_init(void)
-{
-	proc_create("hardware", 0, NULL, &hardware_proc_fops);
-	return 0;
-}
-module_init(proc_hardware_init);
-#endif
-
-void check_bugs(void)
-{
-#ifndef CONFIG_M68KFPU_EMU
-	if (m68k_fputype == 0) {
-		printk(KERN_EMERG "*** YOU DO NOT HAVE A FLOATING POINT UNIT, "
-			"WHICH IS REQUIRED BY LINUX/M68K ***\n");
-		printk(KERN_EMERG "Upgrade your hardware or join the FPU "
-			"emulation project\n");
-		panic("no FPU");
-	}
-#endif /* !CONFIG_M68KFPU_EMU */
-}
-
-#ifdef CONFIG_ADB
-static int __init adb_probe_sync_enable (char *str) {
-	extern int __adb_probe_sync;
-	__adb_probe_sync = 1;
-	return 1;
-}
-
-__setup("adb_sync", adb_probe_sync_enable);
-#endif /* CONFIG_ADB */

diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
new file mode 100644
index 0000000..334d836
--- /dev/null
+++ b/arch/m68k/kernel/setup_mm.c

@@ -0,0 +1,533 @@
+/*
+ *  linux/arch/m68k/kernel/setup.c
+ *
+ *  Copyright (C) 1995  Hamish Macdonald
+ */
+
+/*
+ * This file handles the architecture-dependent parts of system setup
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/console.h>
+#include <linux/genhd.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/initrd.h>
+
+#include <asm/bootinfo.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/fpu.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#ifdef CONFIG_AMIGA
+#include <asm/amigahw.h>
+#endif
+#ifdef CONFIG_ATARI
+#include <asm/atarihw.h>
+#include <asm/atari_stram.h>
+#endif
+#ifdef CONFIG_SUN3X
+#include <asm/dvma.h>
+#endif
+#include <asm/natfeat.h>
+
+#if !FPSTATESIZE || !NR_IRQS
+#warning No CPU/platform type selected, your kernel will not work!
+#warning Are you building an allnoconfig kernel?
+#endif
+
+unsigned long m68k_machtype;
+EXPORT_SYMBOL(m68k_machtype);
+unsigned long m68k_cputype;
+EXPORT_SYMBOL(m68k_cputype);
+unsigned long m68k_fputype;
+unsigned long m68k_mmutype;
+EXPORT_SYMBOL(m68k_mmutype);
+#ifdef CONFIG_VME
+unsigned long vme_brdtype;
+EXPORT_SYMBOL(vme_brdtype);
+#endif
+
+int m68k_is040or060;
+EXPORT_SYMBOL(m68k_is040or060);
+
+extern unsigned long availmem;
+
+int m68k_num_memory;
+EXPORT_SYMBOL(m68k_num_memory);
+int m68k_realnum_memory;
+EXPORT_SYMBOL(m68k_realnum_memory);
+unsigned long m68k_memoffset;
+struct mem_info m68k_memory[NUM_MEMINFO];
+EXPORT_SYMBOL(m68k_memory);
+
+struct mem_info m68k_ramdisk;
+
+static char m68k_command_line[CL_SIZE];
+
+void (*mach_sched_init) (irq_handler_t handler) __initdata = NULL;
+/* machine dependent irq functions */
+void (*mach_init_IRQ) (void) __initdata = NULL;
+void (*mach_get_model) (char *model);
+void (*mach_get_hardware_list) (struct seq_file *m);
+/* machine dependent timer functions */
+unsigned long (*mach_gettimeoffset) (void);
+int (*mach_hwclk) (int, struct rtc_time*);
+EXPORT_SYMBOL(mach_hwclk);
+int (*mach_set_clock_mmss) (unsigned long);
+unsigned int (*mach_get_ss)(void);
+int (*mach_get_rtc_pll)(struct rtc_pll_info *);
+int (*mach_set_rtc_pll)(struct rtc_pll_info *);
+EXPORT_SYMBOL(mach_get_ss);
+EXPORT_SYMBOL(mach_get_rtc_pll);
+EXPORT_SYMBOL(mach_set_rtc_pll);
+void (*mach_reset)( void );
+void (*mach_halt)( void );
+void (*mach_power_off)( void );
+long mach_max_dma_address = 0x00ffffff; /* default set to the lower 16MB */
+#ifdef CONFIG_HEARTBEAT
+void (*mach_heartbeat) (int);
+EXPORT_SYMBOL(mach_heartbeat);
+#endif
+#ifdef CONFIG_M68K_L2_CACHE
+void (*mach_l2_flush) (int);
+#endif
+#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+void (*mach_beep)(unsigned int, unsigned int);
+EXPORT_SYMBOL(mach_beep);
+#endif
+#if defined(CONFIG_ISA) && defined(MULTI_ISA)
+int isa_type;
+int isa_sex;
+EXPORT_SYMBOL(isa_type);
+EXPORT_SYMBOL(isa_sex);
+#endif
+
+extern int amiga_parse_bootinfo(const struct bi_record *);
+extern int atari_parse_bootinfo(const struct bi_record *);
+extern int mac_parse_bootinfo(const struct bi_record *);
+extern int q40_parse_bootinfo(const struct bi_record *);
+extern int bvme6000_parse_bootinfo(const struct bi_record *);
+extern int mvme16x_parse_bootinfo(const struct bi_record *);
+extern int mvme147_parse_bootinfo(const struct bi_record *);
+extern int hp300_parse_bootinfo(const struct bi_record *);
+extern int apollo_parse_bootinfo(const struct bi_record *);
+
+extern void config_amiga(void);
+extern void config_atari(void);
+extern void config_mac(void);
+extern void config_sun3(void);
+extern void config_apollo(void);
+extern void config_mvme147(void);
+extern void config_mvme16x(void);
+extern void config_bvme6000(void);
+extern void config_hp300(void);
+extern void config_q40(void);
+extern void config_sun3x(void);
+
+#define MASK_256K 0xfffc0000
+
+extern void paging_init(void);
+
+static void __init m68k_parse_bootinfo(const struct bi_record *record)
+{
+	while (record->tag != BI_LAST) {
+		int unknown = 0;
+		const unsigned long *data = record->data;
+
+		switch (record->tag) {
+		case BI_MACHTYPE:
+		case BI_CPUTYPE:
+		case BI_FPUTYPE:
+		case BI_MMUTYPE:
+			/* Already set up by head.S */
+			break;
+
+		case BI_MEMCHUNK:
+			if (m68k_num_memory < NUM_MEMINFO) {
+				m68k_memory[m68k_num_memory].addr = data[0];
+				m68k_memory[m68k_num_memory].size = data[1];
+				m68k_num_memory++;
+			} else
+				printk("m68k_parse_bootinfo: too many memory chunks\n");
+			break;
+
+		case BI_RAMDISK:
+			m68k_ramdisk.addr = data[0];
+			m68k_ramdisk.size = data[1];
+			break;
+
+		case BI_COMMAND_LINE:
+			strlcpy(m68k_command_line, (const char *)data,
+				sizeof(m68k_command_line));
+			break;
+
+		default:
+			if (MACH_IS_AMIGA)
+				unknown = amiga_parse_bootinfo(record);
+			else if (MACH_IS_ATARI)
+				unknown = atari_parse_bootinfo(record);
+			else if (MACH_IS_MAC)
+				unknown = mac_parse_bootinfo(record);
+			else if (MACH_IS_Q40)
+				unknown = q40_parse_bootinfo(record);
+			else if (MACH_IS_BVME6000)
+				unknown = bvme6000_parse_bootinfo(record);
+			else if (MACH_IS_MVME16x)
+				unknown = mvme16x_parse_bootinfo(record);
+			else if (MACH_IS_MVME147)
+				unknown = mvme147_parse_bootinfo(record);
+			else if (MACH_IS_HP300)
+				unknown = hp300_parse_bootinfo(record);
+			else if (MACH_IS_APOLLO)
+				unknown = apollo_parse_bootinfo(record);
+			else
+				unknown = 1;
+		}
+		if (unknown)
+			printk("m68k_parse_bootinfo: unknown tag 0x%04x ignored\n",
+			       record->tag);
+		record = (struct bi_record *)((unsigned long)record +
+					      record->size);
+	}
+
+	m68k_realnum_memory = m68k_num_memory;
+#ifdef CONFIG_SINGLE_MEMORY_CHUNK
+	if (m68k_num_memory > 1) {
+		printk("Ignoring last %i chunks of physical memory\n",
+		       (m68k_num_memory - 1));
+		m68k_num_memory = 1;
+	}
+#endif
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	int i;
+
+	/* The bootinfo is located right after the kernel bss */
+	m68k_parse_bootinfo((const struct bi_record *)_end);
+
+	if (CPU_IS_040)
+		m68k_is040or060 = 4;
+	else if (CPU_IS_060)
+		m68k_is040or060 = 6;
+
+	/* FIXME: m68k_fputype is passed in by Penguin booter, which can
+	 * be confused by software FPU emulation. BEWARE.
+	 * We should really do our own FPU check at startup.
+	 * [what do we do with buggy 68LC040s? if we have problems
+	 *  with them, we should add a test to check_bugs() below] */
+#ifndef CONFIG_M68KFPU_EMU_ONLY
+	/* clear the fpu if we have one */
+	if (m68k_fputype & (FPU_68881|FPU_68882|FPU_68040|FPU_68060)) {
+		volatile int zero = 0;
+		asm volatile ("frestore %0" : : "m" (zero));
+	}
+#endif
+
+	if (CPU_IS_060) {
+		u32 pcr;
+
+		asm (".chip 68060; movec %%pcr,%0; .chip 68k"
+		     : "=d" (pcr));
+		if (((pcr >> 8) & 0xff) <= 5) {
+			printk("Enabling workaround for errata I14\n");
+			asm (".chip 68060; movec %0,%%pcr; .chip 68k"
+			     : : "d" (pcr | 0x20));
+		}
+	}
+
+	init_mm.start_code = PAGE_OFFSET;
+	init_mm.end_code = (unsigned long)_etext;
+	init_mm.end_data = (unsigned long)_edata;
+	init_mm.brk = (unsigned long)_end;
+
+	*cmdline_p = m68k_command_line;
+	memcpy(boot_command_line, *cmdline_p, CL_SIZE);
+
+	parse_early_param();
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	switch (m68k_machtype) {
+#ifdef CONFIG_AMIGA
+	case MACH_AMIGA:
+		config_amiga();
+		break;
+#endif
+#ifdef CONFIG_ATARI
+	case MACH_ATARI:
+		config_atari();
+		break;
+#endif
+#ifdef CONFIG_MAC
+	case MACH_MAC:
+		config_mac();
+		break;
+#endif
+#ifdef CONFIG_SUN3
+	case MACH_SUN3:
+		config_sun3();
+		break;
+#endif
+#ifdef CONFIG_APOLLO
+	case MACH_APOLLO:
+		config_apollo();
+		break;
+#endif
+#ifdef CONFIG_MVME147
+	case MACH_MVME147:
+		config_mvme147();
+		break;
+#endif
+#ifdef CONFIG_MVME16x
+	case MACH_MVME16x:
+		config_mvme16x();
+		break;
+#endif
+#ifdef CONFIG_BVME6000
+	case MACH_BVME6000:
+		config_bvme6000();
+		break;
+#endif
+#ifdef CONFIG_HP300
+	case MACH_HP300:
+		config_hp300();
+		break;
+#endif
+#ifdef CONFIG_Q40
+	case MACH_Q40:
+		config_q40();
+		break;
+#endif
+#ifdef CONFIG_SUN3X
+	case MACH_SUN3X:
+		config_sun3x();
+		break;
+#endif
+	default:
+		panic("No configuration setup");
+	}
+
+#ifdef CONFIG_NATFEAT
+	nf_init();
+#endif
+
+	paging_init();
+
+#ifndef CONFIG_SUN3
+	for (i = 1; i < m68k_num_memory; i++)
+		free_bootmem_node(NODE_DATA(i), m68k_memory[i].addr,
+				  m68k_memory[i].size);
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (m68k_ramdisk.size) {
+		reserve_bootmem_node(__virt_to_node(phys_to_virt(m68k_ramdisk.addr)),
+				     m68k_ramdisk.addr, m68k_ramdisk.size,
+				     BOOTMEM_DEFAULT);
+		initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr);
+		initrd_end = initrd_start + m68k_ramdisk.size;
+		printk("initrd: %08lx - %08lx\n", initrd_start, initrd_end);
+	}
+#endif
+
+#ifdef CONFIG_ATARI
+	if (MACH_IS_ATARI)
+		atari_stram_reserve_pages((void *)availmem);
+#endif
+#ifdef CONFIG_SUN3X
+	if (MACH_IS_SUN3X) {
+		dvma_init();
+	}
+#endif
+
+#endif /* !CONFIG_SUN3 */
+
+/* set ISA defs early as possible */
+#if defined(CONFIG_ISA) && defined(MULTI_ISA)
+	if (MACH_IS_Q40) {
+		isa_type = ISA_TYPE_Q40;
+		isa_sex = 0;
+	}
+#ifdef CONFIG_AMIGA_PCMCIA
+	if (MACH_IS_AMIGA && AMIGAHW_PRESENT(PCMCIA)) {
+		isa_type = ISA_TYPE_AG;
+		isa_sex = 1;
+	}
+#endif
+#endif
+}
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	const char *cpu, *mmu, *fpu;
+	unsigned long clockfreq, clockfactor;
+
+#define LOOP_CYCLES_68020	(8)
+#define LOOP_CYCLES_68030	(8)
+#define LOOP_CYCLES_68040	(3)
+#define LOOP_CYCLES_68060	(1)
+
+	if (CPU_IS_020) {
+		cpu = "68020";
+		clockfactor = LOOP_CYCLES_68020;
+	} else if (CPU_IS_030) {
+		cpu = "68030";
+		clockfactor = LOOP_CYCLES_68030;
+	} else if (CPU_IS_040) {
+		cpu = "68040";
+		clockfactor = LOOP_CYCLES_68040;
+	} else if (CPU_IS_060) {
+		cpu = "68060";
+		clockfactor = LOOP_CYCLES_68060;
+	} else {
+		cpu = "680x0";
+		clockfactor = 0;
+	}
+
+#ifdef CONFIG_M68KFPU_EMU_ONLY
+	fpu = "none(soft float)";
+#else
+	if (m68k_fputype & FPU_68881)
+		fpu = "68881";
+	else if (m68k_fputype & FPU_68882)
+		fpu = "68882";
+	else if (m68k_fputype & FPU_68040)
+		fpu = "68040";
+	else if (m68k_fputype & FPU_68060)
+		fpu = "68060";
+	else if (m68k_fputype & FPU_SUNFPA)
+		fpu = "Sun FPA";
+	else
+		fpu = "none";
+#endif
+
+	if (m68k_mmutype & MMU_68851)
+		mmu = "68851";
+	else if (m68k_mmutype & MMU_68030)
+		mmu = "68030";
+	else if (m68k_mmutype & MMU_68040)
+		mmu = "68040";
+	else if (m68k_mmutype & MMU_68060)
+		mmu = "68060";
+	else if (m68k_mmutype & MMU_SUN3)
+		mmu = "Sun-3";
+	else if (m68k_mmutype & MMU_APOLLO)
+		mmu = "Apollo";
+	else
+		mmu = "unknown";
+
+	clockfreq = loops_per_jiffy * HZ * clockfactor;
+
+	seq_printf(m, "CPU:\t\t%s\n"
+		   "MMU:\t\t%s\n"
+		   "FPU:\t\t%s\n"
+		   "Clocking:\t%lu.%1luMHz\n"
+		   "BogoMips:\t%lu.%02lu\n"
+		   "Calibration:\t%lu loops\n",
+		   cpu, mmu, fpu,
+		   clockfreq/1000000,(clockfreq/100000)%10,
+		   loops_per_jiffy/(500000/HZ),(loops_per_jiffy/(5000/HZ))%100,
+		   loops_per_jiffy);
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
+
+#ifdef CONFIG_PROC_HARDWARE
+static int hardware_proc_show(struct seq_file *m, void *v)
+{
+	char model[80];
+	unsigned long mem;
+	int i;
+
+	if (mach_get_model)
+		mach_get_model(model);
+	else
+		strcpy(model, "Unknown m68k");
+
+	seq_printf(m, "Model:\t\t%s\n", model);
+	for (mem = 0, i = 0; i < m68k_num_memory; i++)
+		mem += m68k_memory[i].size;
+	seq_printf(m, "System Memory:\t%ldK\n", mem >> 10);
+
+	if (mach_get_hardware_list)
+		mach_get_hardware_list(m);
+
+	return 0;
+}
+
+static int hardware_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hardware_proc_show, NULL);
+}
+
+static const struct file_operations hardware_proc_fops = {
+	.open		= hardware_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init proc_hardware_init(void)
+{
+	proc_create("hardware", 0, NULL, &hardware_proc_fops);
+	return 0;
+}
+module_init(proc_hardware_init);
+#endif
+
+void check_bugs(void)
+{
+#ifndef CONFIG_M68KFPU_EMU
+	if (m68k_fputype == 0) {
+		printk(KERN_EMERG "*** YOU DO NOT HAVE A FLOATING POINT UNIT, "
+			"WHICH IS REQUIRED BY LINUX/M68K ***\n");
+		printk(KERN_EMERG "Upgrade your hardware or join the FPU "
+			"emulation project\n");
+		panic("no FPU");
+	}
+#endif /* !CONFIG_M68KFPU_EMU */
+}
+
+#ifdef CONFIG_ADB
+static int __init adb_probe_sync_enable (char *str) {
+	extern int __adb_probe_sync;
+	__adb_probe_sync = 1;
+	return 1;
+}
+
+__setup("adb_sync", adb_probe_sync_enable);
+#endif /* CONFIG_ADB */

diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68k/kernel/setup_no.c
similarity index 100%
rename from arch/m68knommu/kernel/setup.c
rename to arch/m68k/kernel/setup_no.c


diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index a0afc23..2e25713 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c

@@ -1,1017 +1,5 @@
-/*
- *  linux/arch/m68k/kernel/signal.c
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-/*
- * Linux/m68k support by Hamish Macdonald
- *
- * 68060 fixes by Jesper Skov
- *
- * 1997-12-01  Modified for POSIX.1b signals by Andreas Schwab
- *
- * mathemu support by Roman Zippel
- *  (Note: fpstate in the signal context is completely ignored for the emulator
- *         and the internal floating point format is put on stack)
- */
-
-/*
- * ++roman (07/09/96): implemented signal stacks (specially for tosemu on
- * Atari :-) Current limitation: Only one sigstack can be active at one time.
- * If a second signal with SA_ONSTACK set arrives while working on a sigstack,
- * SA_ONSTACK is ignored. This behaviour avoids lots of trouble with nested
- * signal handlers!
- */
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/syscalls.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/highuid.h>
-#include <linux/personality.h>
-#include <linux/tty.h>
-#include <linux/binfmts.h>
-#include <linux/module.h>
-
-#include <asm/setup.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/traps.h>
-#include <asm/ucontext.h>
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-static const int frame_extra_sizes[16] = {
-  [1]	= -1, /* sizeof(((struct frame *)0)->un.fmt1), */
-  [2]	= sizeof(((struct frame *)0)->un.fmt2),
-  [3]	= sizeof(((struct frame *)0)->un.fmt3),
-  [4]	= sizeof(((struct frame *)0)->un.fmt4),
-  [5]	= -1, /* sizeof(((struct frame *)0)->un.fmt5), */
-  [6]	= -1, /* sizeof(((struct frame *)0)->un.fmt6), */
-  [7]	= sizeof(((struct frame *)0)->un.fmt7),
-  [8]	= -1, /* sizeof(((struct frame *)0)->un.fmt8), */
-  [9]	= sizeof(((struct frame *)0)->un.fmt9),
-  [10]	= sizeof(((struct frame *)0)->un.fmta),
-  [11]	= sizeof(((struct frame *)0)->un.fmtb),
-  [12]	= -1, /* sizeof(((struct frame *)0)->un.fmtc), */
-  [13]	= -1, /* sizeof(((struct frame *)0)->un.fmtd), */
-  [14]	= -1, /* sizeof(((struct frame *)0)->un.fmte), */
-  [15]	= -1, /* sizeof(((struct frame *)0)->un.fmtf), */
-};
-
-int handle_kernel_fault(struct pt_regs *regs)
-{
-	const struct exception_table_entry *fixup;
-	struct pt_regs *tregs;
-
-	/* Are we prepared to handle this kernel fault? */
-	fixup = search_exception_tables(regs->pc);
-	if (!fixup)
-		return 0;
-
-	/* Create a new four word stack frame, discarding the old one. */
-	regs->stkadj = frame_extra_sizes[regs->format];
-	tregs =	(struct pt_regs *)((long)regs + regs->stkadj);
-	tregs->vector = regs->vector;
-	tregs->format = 0;
-	tregs->pc = fixup->fixup;
-	tregs->sr = regs->sr;
-
-	return 1;
-}
-
-/*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-asmlinkage int
-sys_sigsuspend(int unused0, int unused1, old_sigset_t mask)
-{
-	mask &= _BLOCKABLE;
-	spin_lock_irq(&current->sighand->siglock);
-	current->saved_sigmask = current->blocked;
-	siginitset(&current->blocked, mask);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();
-	set_restore_sigmask();
-
-	return -ERESTARTNOHAND;
-}
-
-asmlinkage int
-sys_sigaction(int sig, const struct old_sigaction __user *act,
-	      struct old_sigaction __user *oact)
-{
-	struct k_sigaction new_ka, old_ka;
-	int ret;
-
-	if (act) {
-		old_sigset_t mask;
-		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
-		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
-		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
-		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
-		    __get_user(mask, &act->sa_mask))
-			return -EFAULT;
-		siginitset(&new_ka.sa.sa_mask, mask);
-	}
-
-	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
-		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
-		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
-		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
-		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
-			return -EFAULT;
-	}
-
-	return ret;
-}
-
-asmlinkage int
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
-{
-	return do_sigaltstack(uss, uoss, rdusp());
-}
-
-
-/*
- * Do a signal return; undo the signal stack.
- *
- * Keep the return code on the stack quadword aligned!
- * That makes the cache flush below easier.
- */
-
-struct sigframe
-{
-	char __user *pretcode;
-	int sig;
-	int code;
-	struct sigcontext __user *psc;
-	char retcode[8];
-	unsigned long extramask[_NSIG_WORDS-1];
-	struct sigcontext sc;
-};
-
-struct rt_sigframe
-{
-	char __user *pretcode;
-	int sig;
-	struct siginfo __user *pinfo;
-	void __user *puc;
-	char retcode[8];
-	struct siginfo info;
-	struct ucontext uc;
-};
-
-
-static unsigned char fpu_version;	/* version number of fpu, set by setup_frame */
-
-static inline int restore_fpu_state(struct sigcontext *sc)
-{
-	int err = 1;
-
-	if (FPU_IS_EMU) {
-	    /* restore registers */
-	    memcpy(current->thread.fpcntl, sc->sc_fpcntl, 12);
-	    memcpy(current->thread.fp, sc->sc_fpregs, 24);
-	    return 0;
-	}
-
-	if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) {
-	    /* Verify the frame format.  */
-	    if (!CPU_IS_060 && (sc->sc_fpstate[0] != fpu_version))
-		goto out;
-	    if (CPU_IS_020_OR_030) {
-		if (m68k_fputype & FPU_68881 &&
-		    !(sc->sc_fpstate[1] == 0x18 || sc->sc_fpstate[1] == 0xb4))
-		    goto out;
-		if (m68k_fputype & FPU_68882 &&
-		    !(sc->sc_fpstate[1] == 0x38 || sc->sc_fpstate[1] == 0xd4))
-		    goto out;
-	    } else if (CPU_IS_040) {
-		if (!(sc->sc_fpstate[1] == 0x00 ||
-                      sc->sc_fpstate[1] == 0x28 ||
-                      sc->sc_fpstate[1] == 0x60))
-		    goto out;
-	    } else if (CPU_IS_060) {
-		if (!(sc->sc_fpstate[3] == 0x00 ||
-                      sc->sc_fpstate[3] == 0x60 ||
-		      sc->sc_fpstate[3] == 0xe0))
-		    goto out;
-	    } else
-		goto out;
-
-	    __asm__ volatile (".chip 68k/68881\n\t"
-			      "fmovemx %0,%%fp0-%%fp1\n\t"
-			      "fmoveml %1,%%fpcr/%%fpsr/%%fpiar\n\t"
-			      ".chip 68k"
-			      : /* no outputs */
-			      : "m" (*sc->sc_fpregs), "m" (*sc->sc_fpcntl));
-	}
-	__asm__ volatile (".chip 68k/68881\n\t"
-			  "frestore %0\n\t"
-			  ".chip 68k" : : "m" (*sc->sc_fpstate));
-	err = 0;
-
-out:
-	return err;
-}
-
-#define FPCONTEXT_SIZE	216
-#define uc_fpstate	uc_filler[0]
-#define uc_formatvec	uc_filler[FPCONTEXT_SIZE/4]
-#define uc_extra	uc_filler[FPCONTEXT_SIZE/4+1]
-
-static inline int rt_restore_fpu_state(struct ucontext __user *uc)
-{
-	unsigned char fpstate[FPCONTEXT_SIZE];
-	int context_size = CPU_IS_060 ? 8 : 0;
-	fpregset_t fpregs;
-	int err = 1;
-
-	if (FPU_IS_EMU) {
-		/* restore fpu control register */
-		if (__copy_from_user(current->thread.fpcntl,
-				uc->uc_mcontext.fpregs.f_fpcntl, 12))
-			goto out;
-		/* restore all other fpu register */
-		if (__copy_from_user(current->thread.fp,
-				uc->uc_mcontext.fpregs.f_fpregs, 96))
-			goto out;
-		return 0;
-	}
-
-	if (__get_user(*(long *)fpstate, (long __user *)&uc->uc_fpstate))
-		goto out;
-	if (CPU_IS_060 ? fpstate[2] : fpstate[0]) {
-		if (!CPU_IS_060)
-			context_size = fpstate[1];
-		/* Verify the frame format.  */
-		if (!CPU_IS_060 && (fpstate[0] != fpu_version))
-			goto out;
-		if (CPU_IS_020_OR_030) {
-			if (m68k_fputype & FPU_68881 &&
-			    !(context_size == 0x18 || context_size == 0xb4))
-				goto out;
-			if (m68k_fputype & FPU_68882 &&
-			    !(context_size == 0x38 || context_size == 0xd4))
-				goto out;
-		} else if (CPU_IS_040) {
-			if (!(context_size == 0x00 ||
-			      context_size == 0x28 ||
-			      context_size == 0x60))
-				goto out;
-		} else if (CPU_IS_060) {
-			if (!(fpstate[3] == 0x00 ||
-			      fpstate[3] == 0x60 ||
-			      fpstate[3] == 0xe0))
-				goto out;
-		} else
-			goto out;
-		if (__copy_from_user(&fpregs, &uc->uc_mcontext.fpregs,
-				     sizeof(fpregs)))
-			goto out;
-		__asm__ volatile (".chip 68k/68881\n\t"
-				  "fmovemx %0,%%fp0-%%fp7\n\t"
-				  "fmoveml %1,%%fpcr/%%fpsr/%%fpiar\n\t"
-				  ".chip 68k"
-				  : /* no outputs */
-				  : "m" (*fpregs.f_fpregs),
-				    "m" (*fpregs.f_fpcntl));
-	}
-	if (context_size &&
-	    __copy_from_user(fpstate + 4, (long __user *)&uc->uc_fpstate + 1,
-			     context_size))
-		goto out;
-	__asm__ volatile (".chip 68k/68881\n\t"
-			  "frestore %0\n\t"
-			  ".chip 68k" : : "m" (*fpstate));
-	err = 0;
-
-out:
-	return err;
-}
-
-static int mangle_kernel_stack(struct pt_regs *regs, int formatvec,
-			       void __user *fp)
-{
-	int fsize = frame_extra_sizes[formatvec >> 12];
-	if (fsize < 0) {
-		/*
-		 * user process trying to return with weird frame format
-		 */
-#ifdef DEBUG
-		printk("user process returning with weird frame format\n");
-#endif
-		return 1;
-	}
-	if (!fsize) {
-		regs->format = formatvec >> 12;
-		regs->vector = formatvec & 0xfff;
-	} else {
-		struct switch_stack *sw = (struct switch_stack *)regs - 1;
-		unsigned long buf[fsize / 2]; /* yes, twice as much */
-
-		/* that'll make sure that expansion won't crap over data */
-		if (copy_from_user(buf + fsize / 4, fp, fsize))
-			return 1;
-
-		/* point of no return */
-		regs->format = formatvec >> 12;
-		regs->vector = formatvec & 0xfff;
-#define frame_offset (sizeof(struct pt_regs)+sizeof(struct switch_stack))
-		__asm__ __volatile__
-			("   movel %0,%/a0\n\t"
-			 "   subl %1,%/a0\n\t"     /* make room on stack */
-			 "   movel %/a0,%/sp\n\t"  /* set stack pointer */
-			 /* move switch_stack and pt_regs */
-			 "1: movel %0@+,%/a0@+\n\t"
-			 "   dbra %2,1b\n\t"
-			 "   lea %/sp@(%c3),%/a0\n\t" /* add offset of fmt */
-			 "   lsrl  #2,%1\n\t"
-			 "   subql #1,%1\n\t"
-			 /* copy to the gap we'd made */
-			 "2: movel %4@+,%/a0@+\n\t"
-			 "   dbra %1,2b\n\t"
-			 "   bral ret_from_signal\n"
-			 : /* no outputs, it doesn't ever return */
-			 : "a" (sw), "d" (fsize), "d" (frame_offset/4-1),
-			   "n" (frame_offset), "a" (buf + fsize/4)
-			 : "a0");
-#undef frame_offset
-	}
-	return 0;
-}
-
-static inline int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __user *fp)
-{
-	int formatvec;
-	struct sigcontext context;
-	int err;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-	/* get previous context */
-	if (copy_from_user(&context, usc, sizeof(context)))
-		goto badframe;
-
-	/* restore passed registers */
-	regs->d0 = context.sc_d0;
-	regs->d1 = context.sc_d1;
-	regs->a0 = context.sc_a0;
-	regs->a1 = context.sc_a1;
-	regs->sr = (regs->sr & 0xff00) | (context.sc_sr & 0xff);
-	regs->pc = context.sc_pc;
-	regs->orig_d0 = -1;		/* disable syscall checks */
-	wrusp(context.sc_usp);
-	formatvec = context.sc_formatvec;
-
-	err = restore_fpu_state(&context);
-
-	if (err || mangle_kernel_stack(regs, formatvec, fp))
-		goto badframe;
-
-	return 0;
-
-badframe:
-	return 1;
-}
-
-static inline int
-rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw,
-		    struct ucontext __user *uc)
-{
-	int temp;
-	greg_t __user *gregs = uc->uc_mcontext.gregs;
-	unsigned long usp;
-	int err;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-	err = __get_user(temp, &uc->uc_mcontext.version);
-	if (temp != MCONTEXT_VERSION)
-		goto badframe;
-	/* restore passed registers */
-	err |= __get_user(regs->d0, &gregs[0]);
-	err |= __get_user(regs->d1, &gregs[1]);
-	err |= __get_user(regs->d2, &gregs[2]);
-	err |= __get_user(regs->d3, &gregs[3]);
-	err |= __get_user(regs->d4, &gregs[4]);
-	err |= __get_user(regs->d5, &gregs[5]);
-	err |= __get_user(sw->d6, &gregs[6]);
-	err |= __get_user(sw->d7, &gregs[7]);
-	err |= __get_user(regs->a0, &gregs[8]);
-	err |= __get_user(regs->a1, &gregs[9]);
-	err |= __get_user(regs->a2, &gregs[10]);
-	err |= __get_user(sw->a3, &gregs[11]);
-	err |= __get_user(sw->a4, &gregs[12]);
-	err |= __get_user(sw->a5, &gregs[13]);
-	err |= __get_user(sw->a6, &gregs[14]);
-	err |= __get_user(usp, &gregs[15]);
-	wrusp(usp);
-	err |= __get_user(regs->pc, &gregs[16]);
-	err |= __get_user(temp, &gregs[17]);
-	regs->sr = (regs->sr & 0xff00) | (temp & 0xff);
-	regs->orig_d0 = -1;		/* disable syscall checks */
-	err |= __get_user(temp, &uc->uc_formatvec);
-
-	err |= rt_restore_fpu_state(uc);
-
-	if (err || do_sigaltstack(&uc->uc_stack, NULL, usp) == -EFAULT)
-		goto badframe;
-
-	if (mangle_kernel_stack(regs, temp, &uc->uc_extra))
-		goto badframe;
-
-	return 0;
-
-badframe:
-	return 1;
-}
-
-asmlinkage int do_sigreturn(unsigned long __unused)
-{
-	struct switch_stack *sw = (struct switch_stack *) &__unused;
-	struct pt_regs *regs = (struct pt_regs *) (sw + 1);
-	unsigned long usp = rdusp();
-	struct sigframe __user *frame = (struct sigframe __user *)(usp - 4);
-	sigset_t set;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__get_user(set.sig[0], &frame->sc.sc_mask) ||
-	    (_NSIG_WORDS > 1 &&
-	     __copy_from_user(&set.sig[1], &frame->extramask,
-			      sizeof(frame->extramask))))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	current->blocked = set;
-	recalc_sigpending();
-
-	if (restore_sigcontext(regs, &frame->sc, frame + 1))
-		goto badframe;
-	return regs->d0;
-
-badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}
-
-asmlinkage int do_rt_sigreturn(unsigned long __unused)
-{
-	struct switch_stack *sw = (struct switch_stack *) &__unused;
-	struct pt_regs *regs = (struct pt_regs *) (sw + 1);
-	unsigned long usp = rdusp();
-	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(usp - 4);
-	sigset_t set;
-
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
-		goto badframe;
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
-		goto badframe;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	current->blocked = set;
-	recalc_sigpending();
-
-	if (rt_restore_ucontext(regs, sw, &frame->uc))
-		goto badframe;
-	return regs->d0;
-
-badframe:
-	force_sig(SIGSEGV, current);
-	return 0;
-}
-
-/*
- * Set up a signal frame.
- */
-
-static inline void save_fpu_state(struct sigcontext *sc, struct pt_regs *regs)
-{
-	if (FPU_IS_EMU) {
-		/* save registers */
-		memcpy(sc->sc_fpcntl, current->thread.fpcntl, 12);
-		memcpy(sc->sc_fpregs, current->thread.fp, 24);
-		return;
-	}
-
-	__asm__ volatile (".chip 68k/68881\n\t"
-			  "fsave %0\n\t"
-			  ".chip 68k"
-			  : : "m" (*sc->sc_fpstate) : "memory");
-
-	if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) {
-		fpu_version = sc->sc_fpstate[0];
-		if (CPU_IS_020_OR_030 &&
-		    regs->vector >= (VEC_FPBRUC * 4) &&
-		    regs->vector <= (VEC_FPNAN * 4)) {
-			/* Clear pending exception in 68882 idle frame */
-			if (*(unsigned short *) sc->sc_fpstate == 0x1f38)
-				sc->sc_fpstate[0x38] |= 1 << 3;
-		}
-		__asm__ volatile (".chip 68k/68881\n\t"
-				  "fmovemx %%fp0-%%fp1,%0\n\t"
-				  "fmoveml %%fpcr/%%fpsr/%%fpiar,%1\n\t"
-				  ".chip 68k"
-				  : "=m" (*sc->sc_fpregs),
-				    "=m" (*sc->sc_fpcntl)
-				  : /* no inputs */
-				  : "memory");
-	}
-}
-
-static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs *regs)
-{
-	unsigned char fpstate[FPCONTEXT_SIZE];
-	int context_size = CPU_IS_060 ? 8 : 0;
-	int err = 0;
-
-	if (FPU_IS_EMU) {
-		/* save fpu control register */
-		err |= copy_to_user(uc->uc_mcontext.fpregs.f_fpcntl,
-				current->thread.fpcntl, 12);
-		/* save all other fpu register */
-		err |= copy_to_user(uc->uc_mcontext.fpregs.f_fpregs,
-				current->thread.fp, 96);
-		return err;
-	}
-
-	__asm__ volatile (".chip 68k/68881\n\t"
-			  "fsave %0\n\t"
-			  ".chip 68k"
-			  : : "m" (*fpstate) : "memory");
-
-	err |= __put_user(*(long *)fpstate, (long __user *)&uc->uc_fpstate);
-	if (CPU_IS_060 ? fpstate[2] : fpstate[0]) {
-		fpregset_t fpregs;
-		if (!CPU_IS_060)
-			context_size = fpstate[1];
-		fpu_version = fpstate[0];
-		if (CPU_IS_020_OR_030 &&
-		    regs->vector >= (VEC_FPBRUC * 4) &&
-		    regs->vector <= (VEC_FPNAN * 4)) {
-			/* Clear pending exception in 68882 idle frame */
-			if (*(unsigned short *) fpstate == 0x1f38)
-				fpstate[0x38] |= 1 << 3;
-		}
-		__asm__ volatile (".chip 68k/68881\n\t"
-				  "fmovemx %%fp0-%%fp7,%0\n\t"
-				  "fmoveml %%fpcr/%%fpsr/%%fpiar,%1\n\t"
-				  ".chip 68k"
-				  : "=m" (*fpregs.f_fpregs),
-				    "=m" (*fpregs.f_fpcntl)
-				  : /* no inputs */
-				  : "memory");
-		err |= copy_to_user(&uc->uc_mcontext.fpregs, &fpregs,
-				    sizeof(fpregs));
-	}
-	if (context_size)
-		err |= copy_to_user((long __user *)&uc->uc_fpstate + 1, fpstate + 4,
-				    context_size);
-	return err;
-}
-
-static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
-			     unsigned long mask)
-{
-	sc->sc_mask = mask;
-	sc->sc_usp = rdusp();
-	sc->sc_d0 = regs->d0;
-	sc->sc_d1 = regs->d1;
-	sc->sc_a0 = regs->a0;
-	sc->sc_a1 = regs->a1;
-	sc->sc_sr = regs->sr;
-	sc->sc_pc = regs->pc;
-	sc->sc_formatvec = regs->format << 12 | regs->vector;
-	save_fpu_state(sc, regs);
-}
-
-static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs)
-{
-	struct switch_stack *sw = (struct switch_stack *)regs - 1;
-	greg_t __user *gregs = uc->uc_mcontext.gregs;
-	int err = 0;
-
-	err |= __put_user(MCONTEXT_VERSION, &uc->uc_mcontext.version);
-	err |= __put_user(regs->d0, &gregs[0]);
-	err |= __put_user(regs->d1, &gregs[1]);
-	err |= __put_user(regs->d2, &gregs[2]);
-	err |= __put_user(regs->d3, &gregs[3]);
-	err |= __put_user(regs->d4, &gregs[4]);
-	err |= __put_user(regs->d5, &gregs[5]);
-	err |= __put_user(sw->d6, &gregs[6]);
-	err |= __put_user(sw->d7, &gregs[7]);
-	err |= __put_user(regs->a0, &gregs[8]);
-	err |= __put_user(regs->a1, &gregs[9]);
-	err |= __put_user(regs->a2, &gregs[10]);
-	err |= __put_user(sw->a3, &gregs[11]);
-	err |= __put_user(sw->a4, &gregs[12]);
-	err |= __put_user(sw->a5, &gregs[13]);
-	err |= __put_user(sw->a6, &gregs[14]);
-	err |= __put_user(rdusp(), &gregs[15]);
-	err |= __put_user(regs->pc, &gregs[16]);
-	err |= __put_user(regs->sr, &gregs[17]);
-	err |= __put_user((regs->format << 12) | regs->vector, &uc->uc_formatvec);
-	err |= rt_save_fpu_state(uc, regs);
-	return err;
-}
-
-static inline void push_cache (unsigned long vaddr)
-{
-	/*
-	 * Using the old cache_push_v() was really a big waste.
-	 *
-	 * What we are trying to do is to flush 8 bytes to ram.
-	 * Flushing 2 cache lines of 16 bytes is much cheaper than
-	 * flushing 1 or 2 pages, as previously done in
-	 * cache_push_v().
-	 *                                                     Jes
-	 */
-	if (CPU_IS_040) {
-		unsigned long temp;
-
-		__asm__ __volatile__ (".chip 68040\n\t"
-				      "nop\n\t"
-				      "ptestr (%1)\n\t"
-				      "movec %%mmusr,%0\n\t"
-				      ".chip 68k"
-				      : "=r" (temp)
-				      : "a" (vaddr));
-
-		temp &= PAGE_MASK;
-		temp |= vaddr & ~PAGE_MASK;
-
-		__asm__ __volatile__ (".chip 68040\n\t"
-				      "nop\n\t"
-				      "cpushl %%bc,(%0)\n\t"
-				      ".chip 68k"
-				      : : "a" (temp));
-	}
-	else if (CPU_IS_060) {
-		unsigned long temp;
-		__asm__ __volatile__ (".chip 68060\n\t"
-				      "plpar (%0)\n\t"
-				      ".chip 68k"
-				      : "=a" (temp)
-				      : "0" (vaddr));
-		__asm__ __volatile__ (".chip 68060\n\t"
-				      "cpushl %%bc,(%0)\n\t"
-				      ".chip 68k"
-				      : : "a" (temp));
-	}
-	else {
-		/*
-		 * 68030/68020 have no writeback cache;
-		 * still need to clear icache.
-		 * Note that vaddr is guaranteed to be long word aligned.
-		 */
-		unsigned long temp;
-		asm volatile ("movec %%cacr,%0" : "=r" (temp));
-		temp += 4;
-		asm volatile ("movec %0,%%caar\n\t"
-			      "movec %1,%%cacr"
-			      : : "r" (vaddr), "r" (temp));
-		asm volatile ("movec %0,%%caar\n\t"
-			      "movec %1,%%cacr"
-			      : : "r" (vaddr + 4), "r" (temp));
-	}
-}
-
-static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
-{
-	unsigned long usp;
-
-	/* Default to using normal stack.  */
-	usp = rdusp();
-
-	/* This is the X/Open sanctioned signal stack switching.  */
-	if (ka->sa.sa_flags & SA_ONSTACK) {
-		if (!sas_ss_flags(usp))
-			usp = current->sas_ss_sp + current->sas_ss_size;
-	}
-	return (void __user *)((usp - frame_size) & -8UL);
-}
-
-static int setup_frame (int sig, struct k_sigaction *ka,
-			 sigset_t *set, struct pt_regs *regs)
-{
-	struct sigframe __user *frame;
-	int fsize = frame_extra_sizes[regs->format];
-	struct sigcontext context;
-	int err = 0;
-
-	if (fsize < 0) {
-#ifdef DEBUG
-		printk ("setup_frame: Unknown frame format %#x\n",
-			regs->format);
-#endif
-		goto give_sigsegv;
-	}
-
-	frame = get_sigframe(ka, regs, sizeof(*frame) + fsize);
-
-	if (fsize)
-		err |= copy_to_user (frame + 1, regs + 1, fsize);
-
-	err |= __put_user((current_thread_info()->exec_domain
-			   && current_thread_info()->exec_domain->signal_invmap
-			   && sig < 32
-			   ? current_thread_info()->exec_domain->signal_invmap[sig]
-			   : sig),
-			  &frame->sig);
-
-	err |= __put_user(regs->vector, &frame->code);
-	err |= __put_user(&frame->sc, &frame->psc);
-
-	if (_NSIG_WORDS > 1)
-		err |= copy_to_user(frame->extramask, &set->sig[1],
-				    sizeof(frame->extramask));
-
-	setup_sigcontext(&context, regs, set->sig[0]);
-	err |= copy_to_user (&frame->sc, &context, sizeof(context));
-
-	/* Set up to return from userspace.  */
-	err |= __put_user(frame->retcode, &frame->pretcode);
-	/* moveq #,d0; trap #0 */
-	err |= __put_user(0x70004e40 + (__NR_sigreturn << 16),
-			  (long __user *)(frame->retcode));
-
-	if (err)
-		goto give_sigsegv;
-
-	push_cache ((unsigned long) &frame->retcode);
-
-	/*
-	 * Set up registers for signal handler.  All the state we are about
-	 * to destroy is successfully copied to sigframe.
-	 */
-	wrusp ((unsigned long) frame);
-	regs->pc = (unsigned long) ka->sa.sa_handler;
-
-	/*
-	 * This is subtle; if we build more than one sigframe, all but the
-	 * first one will see frame format 0 and have fsize == 0, so we won't
-	 * screw stkadj.
-	 */
-	if (fsize)
-		regs->stkadj = fsize;
-
-	/* Prepare to skip over the extra stuff in the exception frame.  */
-	if (regs->stkadj) {
-		struct pt_regs *tregs =
-			(struct pt_regs *)((ulong)regs + regs->stkadj);
-#ifdef DEBUG
-		printk("Performing stackadjust=%04x\n", regs->stkadj);
-#endif
-		/* This must be copied with decreasing addresses to
-                   handle overlaps.  */
-		tregs->vector = 0;
-		tregs->format = 0;
-		tregs->pc = regs->pc;
-		tregs->sr = regs->sr;
-	}
-	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return err;
-}
-
-static int setup_rt_frame (int sig, struct k_sigaction *ka, siginfo_t *info,
-			    sigset_t *set, struct pt_regs *regs)
-{
-	struct rt_sigframe __user *frame;
-	int fsize = frame_extra_sizes[regs->format];
-	int err = 0;
-
-	if (fsize < 0) {
-#ifdef DEBUG
-		printk ("setup_frame: Unknown frame format %#x\n",
-			regs->format);
-#endif
-		goto give_sigsegv;
-	}
-
-	frame = get_sigframe(ka, regs, sizeof(*frame));
-
-	if (fsize)
-		err |= copy_to_user (&frame->uc.uc_extra, regs + 1, fsize);
-
-	err |= __put_user((current_thread_info()->exec_domain
-			   && current_thread_info()->exec_domain->signal_invmap
-			   && sig < 32
-			   ? current_thread_info()->exec_domain->signal_invmap[sig]
-			   : sig),
-			  &frame->sig);
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
-	err |= copy_siginfo_to_user(&frame->info, info);
-
-	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(NULL, &frame->uc.uc_link);
-	err |= __put_user((void __user *)current->sas_ss_sp,
-			  &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(rdusp()),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= rt_setup_ucontext(&frame->uc, regs);
-	err |= copy_to_user (&frame->uc.uc_sigmask, set, sizeof(*set));
-
-	/* Set up to return from userspace.  */
-	err |= __put_user(frame->retcode, &frame->pretcode);
-#ifdef __mcoldfire__
-	/* movel #__NR_rt_sigreturn,d0; trap #0 */
-	err |= __put_user(0x203c0000, (long __user *)(frame->retcode + 0));
-	err |= __put_user(0x00004e40 + (__NR_rt_sigreturn << 16),
-			  (long __user *)(frame->retcode + 4));
+#ifdef CONFIG_MMU
+#include "signal_mm.c"
 #else
-	/* moveq #,d0; notb d0; trap #0 */
-	err |= __put_user(0x70004600 + ((__NR_rt_sigreturn ^ 0xff) << 16),
-			  (long __user *)(frame->retcode + 0));
-	err |= __put_user(0x4e40, (short __user *)(frame->retcode + 4));
+#include "signal_no.c"
 #endif
-
-	if (err)
-		goto give_sigsegv;
-
-	push_cache ((unsigned long) &frame->retcode);
-
-	/*
-	 * Set up registers for signal handler.  All the state we are about
-	 * to destroy is successfully copied to sigframe.
-	 */
-	wrusp ((unsigned long) frame);
-	regs->pc = (unsigned long) ka->sa.sa_handler;
-
-	/*
-	 * This is subtle; if we build more than one sigframe, all but the
-	 * first one will see frame format 0 and have fsize == 0, so we won't
-	 * screw stkadj.
-	 */
-	if (fsize)
-		regs->stkadj = fsize;
-
-	/* Prepare to skip over the extra stuff in the exception frame.  */
-	if (regs->stkadj) {
-		struct pt_regs *tregs =
-			(struct pt_regs *)((ulong)regs + regs->stkadj);
-#ifdef DEBUG
-		printk("Performing stackadjust=%04x\n", regs->stkadj);
-#endif
-		/* This must be copied with decreasing addresses to
-                   handle overlaps.  */
-		tregs->vector = 0;
-		tregs->format = 0;
-		tregs->pc = regs->pc;
-		tregs->sr = regs->sr;
-	}
-	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return err;
-}
-
-static inline void
-handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler)
-{
-	switch (regs->d0) {
-	case -ERESTARTNOHAND:
-		if (!has_handler)
-			goto do_restart;
-		regs->d0 = -EINTR;
-		break;
-
-	case -ERESTART_RESTARTBLOCK:
-		if (!has_handler) {
-			regs->d0 = __NR_restart_syscall;
-			regs->pc -= 2;
-			break;
-		}
-		regs->d0 = -EINTR;
-		break;
-
-	case -ERESTARTSYS:
-		if (has_handler && !(ka->sa.sa_flags & SA_RESTART)) {
-			regs->d0 = -EINTR;
-			break;
-		}
-	/* fallthrough */
-	case -ERESTARTNOINTR:
-	do_restart:
-		regs->d0 = regs->orig_d0;
-		regs->pc -= 2;
-		break;
-	}
-}
-
-void ptrace_signal_deliver(struct pt_regs *regs, void *cookie)
-{
-	if (regs->orig_d0 < 0)
-		return;
-	switch (regs->d0) {
-	case -ERESTARTNOHAND:
-	case -ERESTARTSYS:
-	case -ERESTARTNOINTR:
-		regs->d0 = regs->orig_d0;
-		regs->orig_d0 = -1;
-		regs->pc -= 2;
-		break;
-	}
-}
-
-/*
- * OK, we're invoking a handler
- */
-static void
-handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
-	      sigset_t *oldset, struct pt_regs *regs)
-{
-	int err;
-	/* are we from a system call? */
-	if (regs->orig_d0 >= 0)
-		/* If so, check system call restarting.. */
-		handle_restart(regs, ka, 1);
-
-	/* set up the stack frame */
-	if (ka->sa.sa_flags & SA_SIGINFO)
-		err = setup_rt_frame(sig, ka, info, oldset, regs);
-	else
-		err = setup_frame(sig, ka, oldset, regs);
-
-	if (err)
-		return;
-
-	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&current->blocked,sig);
-	recalc_sigpending();
-
-	if (test_thread_flag(TIF_DELAYED_TRACE)) {
-		regs->sr &= ~0x8000;
-		send_sig(SIGTRAP, current, 1);
-	}
-
-	clear_thread_flag(TIF_RESTORE_SIGMASK);
-}
-
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- */
-asmlinkage void do_signal(struct pt_regs *regs)
-{
-	siginfo_t info;
-	struct k_sigaction ka;
-	int signr;
-	sigset_t *oldset;
-
-	current->thread.esp0 = (unsigned long) regs;
-
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
-	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-	if (signr > 0) {
-		/* Whee!  Actually deliver the signal.  */
-		handle_signal(signr, &ka, &info, oldset, regs);
-		return;
-	}
-
-	/* Did we come from a system call? */
-	if (regs->orig_d0 >= 0)
-		/* Restart the system call - no handlers present */
-		handle_restart(regs, NULL, 0);
-
-	/* If there's no signal to deliver, we just restore the saved mask.  */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
-}

diff --git a/arch/m68k/kernel/signal_mm.c b/arch/m68k/kernel/signal_mm.c
new file mode 100644
index 0000000..a0afc23
--- /dev/null
+++ b/arch/m68k/kernel/signal_mm.c

@@ -0,0 +1,1017 @@
+/*
+ *  linux/arch/m68k/kernel/signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+/*
+ * Linux/m68k support by Hamish Macdonald
+ *
+ * 68060 fixes by Jesper Skov
+ *
+ * 1997-12-01  Modified for POSIX.1b signals by Andreas Schwab
+ *
+ * mathemu support by Roman Zippel
+ *  (Note: fpstate in the signal context is completely ignored for the emulator
+ *         and the internal floating point format is put on stack)
+ */
+
+/*
+ * ++roman (07/09/96): implemented signal stacks (specially for tosemu on
+ * Atari :-) Current limitation: Only one sigstack can be active at one time.
+ * If a second signal with SA_ONSTACK set arrives while working on a sigstack,
+ * SA_ONSTACK is ignored. This behaviour avoids lots of trouble with nested
+ * signal handlers!
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/syscalls.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/highuid.h>
+#include <linux/personality.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/module.h>
+
+#include <asm/setup.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/traps.h>
+#include <asm/ucontext.h>
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+static const int frame_extra_sizes[16] = {
+  [1]	= -1, /* sizeof(((struct frame *)0)->un.fmt1), */
+  [2]	= sizeof(((struct frame *)0)->un.fmt2),
+  [3]	= sizeof(((struct frame *)0)->un.fmt3),
+  [4]	= sizeof(((struct frame *)0)->un.fmt4),
+  [5]	= -1, /* sizeof(((struct frame *)0)->un.fmt5), */
+  [6]	= -1, /* sizeof(((struct frame *)0)->un.fmt6), */
+  [7]	= sizeof(((struct frame *)0)->un.fmt7),
+  [8]	= -1, /* sizeof(((struct frame *)0)->un.fmt8), */
+  [9]	= sizeof(((struct frame *)0)->un.fmt9),
+  [10]	= sizeof(((struct frame *)0)->un.fmta),
+  [11]	= sizeof(((struct frame *)0)->un.fmtb),
+  [12]	= -1, /* sizeof(((struct frame *)0)->un.fmtc), */
+  [13]	= -1, /* sizeof(((struct frame *)0)->un.fmtd), */
+  [14]	= -1, /* sizeof(((struct frame *)0)->un.fmte), */
+  [15]	= -1, /* sizeof(((struct frame *)0)->un.fmtf), */
+};
+
+int handle_kernel_fault(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+	struct pt_regs *tregs;
+
+	/* Are we prepared to handle this kernel fault? */
+	fixup = search_exception_tables(regs->pc);
+	if (!fixup)
+		return 0;
+
+	/* Create a new four word stack frame, discarding the old one. */
+	regs->stkadj = frame_extra_sizes[regs->format];
+	tregs =	(struct pt_regs *)((long)regs + regs->stkadj);
+	tregs->vector = regs->vector;
+	tregs->format = 0;
+	tregs->pc = fixup->fixup;
+	tregs->sr = regs->sr;
+
+	return 1;
+}
+
+/*
+ * Atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int
+sys_sigsuspend(int unused0, int unused1, old_sigset_t mask)
+{
+	mask &= _BLOCKABLE;
+	spin_lock_irq(&current->sighand->siglock);
+	current->saved_sigmask = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	set_restore_sigmask();
+
+	return -ERESTARTNOHAND;
+}
+
+asmlinkage int
+sys_sigaction(int sig, const struct old_sigaction __user *act,
+	      struct old_sigaction __user *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
+		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+		    __get_user(mask, &act->sa_mask))
+			return -EFAULT;
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
+		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
+			return -EFAULT;
+	}
+
+	return ret;
+}
+
+asmlinkage int
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
+{
+	return do_sigaltstack(uss, uoss, rdusp());
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ *
+ * Keep the return code on the stack quadword aligned!
+ * That makes the cache flush below easier.
+ */
+
+struct sigframe
+{
+	char __user *pretcode;
+	int sig;
+	int code;
+	struct sigcontext __user *psc;
+	char retcode[8];
+	unsigned long extramask[_NSIG_WORDS-1];
+	struct sigcontext sc;
+};
+
+struct rt_sigframe
+{
+	char __user *pretcode;
+	int sig;
+	struct siginfo __user *pinfo;
+	void __user *puc;
+	char retcode[8];
+	struct siginfo info;
+	struct ucontext uc;
+};
+
+
+static unsigned char fpu_version;	/* version number of fpu, set by setup_frame */
+
+static inline int restore_fpu_state(struct sigcontext *sc)
+{
+	int err = 1;
+
+	if (FPU_IS_EMU) {
+	    /* restore registers */
+	    memcpy(current->thread.fpcntl, sc->sc_fpcntl, 12);
+	    memcpy(current->thread.fp, sc->sc_fpregs, 24);
+	    return 0;
+	}
+
+	if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) {
+	    /* Verify the frame format.  */
+	    if (!CPU_IS_060 && (sc->sc_fpstate[0] != fpu_version))
+		goto out;
+	    if (CPU_IS_020_OR_030) {
+		if (m68k_fputype & FPU_68881 &&
+		    !(sc->sc_fpstate[1] == 0x18 || sc->sc_fpstate[1] == 0xb4))
+		    goto out;
+		if (m68k_fputype & FPU_68882 &&
+		    !(sc->sc_fpstate[1] == 0x38 || sc->sc_fpstate[1] == 0xd4))
+		    goto out;
+	    } else if (CPU_IS_040) {
+		if (!(sc->sc_fpstate[1] == 0x00 ||
+                      sc->sc_fpstate[1] == 0x28 ||
+                      sc->sc_fpstate[1] == 0x60))
+		    goto out;
+	    } else if (CPU_IS_060) {
+		if (!(sc->sc_fpstate[3] == 0x00 ||
+                      sc->sc_fpstate[3] == 0x60 ||
+		      sc->sc_fpstate[3] == 0xe0))
+		    goto out;
+	    } else
+		goto out;
+
+	    __asm__ volatile (".chip 68k/68881\n\t"
+			      "fmovemx %0,%%fp0-%%fp1\n\t"
+			      "fmoveml %1,%%fpcr/%%fpsr/%%fpiar\n\t"
+			      ".chip 68k"
+			      : /* no outputs */
+			      : "m" (*sc->sc_fpregs), "m" (*sc->sc_fpcntl));
+	}
+	__asm__ volatile (".chip 68k/68881\n\t"
+			  "frestore %0\n\t"
+			  ".chip 68k" : : "m" (*sc->sc_fpstate));
+	err = 0;
+
+out:
+	return err;
+}
+
+#define FPCONTEXT_SIZE	216
+#define uc_fpstate	uc_filler[0]
+#define uc_formatvec	uc_filler[FPCONTEXT_SIZE/4]
+#define uc_extra	uc_filler[FPCONTEXT_SIZE/4+1]
+
+static inline int rt_restore_fpu_state(struct ucontext __user *uc)
+{
+	unsigned char fpstate[FPCONTEXT_SIZE];
+	int context_size = CPU_IS_060 ? 8 : 0;
+	fpregset_t fpregs;
+	int err = 1;
+
+	if (FPU_IS_EMU) {
+		/* restore fpu control register */
+		if (__copy_from_user(current->thread.fpcntl,
+				uc->uc_mcontext.fpregs.f_fpcntl, 12))
+			goto out;
+		/* restore all other fpu register */
+		if (__copy_from_user(current->thread.fp,
+				uc->uc_mcontext.fpregs.f_fpregs, 96))
+			goto out;
+		return 0;
+	}
+
+	if (__get_user(*(long *)fpstate, (long __user *)&uc->uc_fpstate))
+		goto out;
+	if (CPU_IS_060 ? fpstate[2] : fpstate[0]) {
+		if (!CPU_IS_060)
+			context_size = fpstate[1];
+		/* Verify the frame format.  */
+		if (!CPU_IS_060 && (fpstate[0] != fpu_version))
+			goto out;
+		if (CPU_IS_020_OR_030) {
+			if (m68k_fputype & FPU_68881 &&
+			    !(context_size == 0x18 || context_size == 0xb4))
+				goto out;
+			if (m68k_fputype & FPU_68882 &&
+			    !(context_size == 0x38 || context_size == 0xd4))
+				goto out;
+		} else if (CPU_IS_040) {
+			if (!(context_size == 0x00 ||
+			      context_size == 0x28 ||
+			      context_size == 0x60))
+				goto out;
+		} else if (CPU_IS_060) {
+			if (!(fpstate[3] == 0x00 ||
+			      fpstate[3] == 0x60 ||
+			      fpstate[3] == 0xe0))
+				goto out;
+		} else
+			goto out;
+		if (__copy_from_user(&fpregs, &uc->uc_mcontext.fpregs,
+				     sizeof(fpregs)))
+			goto out;
+		__asm__ volatile (".chip 68k/68881\n\t"
+				  "fmovemx %0,%%fp0-%%fp7\n\t"
+				  "fmoveml %1,%%fpcr/%%fpsr/%%fpiar\n\t"
+				  ".chip 68k"
+				  : /* no outputs */
+				  : "m" (*fpregs.f_fpregs),
+				    "m" (*fpregs.f_fpcntl));
+	}
+	if (context_size &&
+	    __copy_from_user(fpstate + 4, (long __user *)&uc->uc_fpstate + 1,
+			     context_size))
+		goto out;
+	__asm__ volatile (".chip 68k/68881\n\t"
+			  "frestore %0\n\t"
+			  ".chip 68k" : : "m" (*fpstate));
+	err = 0;
+
+out:
+	return err;
+}
+
+static int mangle_kernel_stack(struct pt_regs *regs, int formatvec,
+			       void __user *fp)
+{
+	int fsize = frame_extra_sizes[formatvec >> 12];
+	if (fsize < 0) {
+		/*
+		 * user process trying to return with weird frame format
+		 */
+#ifdef DEBUG
+		printk("user process returning with weird frame format\n");
+#endif
+		return 1;
+	}
+	if (!fsize) {
+		regs->format = formatvec >> 12;
+		regs->vector = formatvec & 0xfff;
+	} else {
+		struct switch_stack *sw = (struct switch_stack *)regs - 1;
+		unsigned long buf[fsize / 2]; /* yes, twice as much */
+
+		/* that'll make sure that expansion won't crap over data */
+		if (copy_from_user(buf + fsize / 4, fp, fsize))
+			return 1;
+
+		/* point of no return */
+		regs->format = formatvec >> 12;
+		regs->vector = formatvec & 0xfff;
+#define frame_offset (sizeof(struct pt_regs)+sizeof(struct switch_stack))
+		__asm__ __volatile__
+			("   movel %0,%/a0\n\t"
+			 "   subl %1,%/a0\n\t"     /* make room on stack */
+			 "   movel %/a0,%/sp\n\t"  /* set stack pointer */
+			 /* move switch_stack and pt_regs */
+			 "1: movel %0@+,%/a0@+\n\t"
+			 "   dbra %2,1b\n\t"
+			 "   lea %/sp@(%c3),%/a0\n\t" /* add offset of fmt */
+			 "   lsrl  #2,%1\n\t"
+			 "   subql #1,%1\n\t"
+			 /* copy to the gap we'd made */
+			 "2: movel %4@+,%/a0@+\n\t"
+			 "   dbra %1,2b\n\t"
+			 "   bral ret_from_signal\n"
+			 : /* no outputs, it doesn't ever return */
+			 : "a" (sw), "d" (fsize), "d" (frame_offset/4-1),
+			   "n" (frame_offset), "a" (buf + fsize/4)
+			 : "a0");
+#undef frame_offset
+	}
+	return 0;
+}
+
+static inline int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __user *fp)
+{
+	int formatvec;
+	struct sigcontext context;
+	int err;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	/* get previous context */
+	if (copy_from_user(&context, usc, sizeof(context)))
+		goto badframe;
+
+	/* restore passed registers */
+	regs->d0 = context.sc_d0;
+	regs->d1 = context.sc_d1;
+	regs->a0 = context.sc_a0;
+	regs->a1 = context.sc_a1;
+	regs->sr = (regs->sr & 0xff00) | (context.sc_sr & 0xff);
+	regs->pc = context.sc_pc;
+	regs->orig_d0 = -1;		/* disable syscall checks */
+	wrusp(context.sc_usp);
+	formatvec = context.sc_formatvec;
+
+	err = restore_fpu_state(&context);
+
+	if (err || mangle_kernel_stack(regs, formatvec, fp))
+		goto badframe;
+
+	return 0;
+
+badframe:
+	return 1;
+}
+
+static inline int
+rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw,
+		    struct ucontext __user *uc)
+{
+	int temp;
+	greg_t __user *gregs = uc->uc_mcontext.gregs;
+	unsigned long usp;
+	int err;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	err = __get_user(temp, &uc->uc_mcontext.version);
+	if (temp != MCONTEXT_VERSION)
+		goto badframe;
+	/* restore passed registers */
+	err |= __get_user(regs->d0, &gregs[0]);
+	err |= __get_user(regs->d1, &gregs[1]);
+	err |= __get_user(regs->d2, &gregs[2]);
+	err |= __get_user(regs->d3, &gregs[3]);
+	err |= __get_user(regs->d4, &gregs[4]);
+	err |= __get_user(regs->d5, &gregs[5]);
+	err |= __get_user(sw->d6, &gregs[6]);
+	err |= __get_user(sw->d7, &gregs[7]);
+	err |= __get_user(regs->a0, &gregs[8]);
+	err |= __get_user(regs->a1, &gregs[9]);
+	err |= __get_user(regs->a2, &gregs[10]);
+	err |= __get_user(sw->a3, &gregs[11]);
+	err |= __get_user(sw->a4, &gregs[12]);
+	err |= __get_user(sw->a5, &gregs[13]);
+	err |= __get_user(sw->a6, &gregs[14]);
+	err |= __get_user(usp, &gregs[15]);
+	wrusp(usp);
+	err |= __get_user(regs->pc, &gregs[16]);
+	err |= __get_user(temp, &gregs[17]);
+	regs->sr = (regs->sr & 0xff00) | (temp & 0xff);
+	regs->orig_d0 = -1;		/* disable syscall checks */
+	err |= __get_user(temp, &uc->uc_formatvec);
+
+	err |= rt_restore_fpu_state(uc);
+
+	if (err || do_sigaltstack(&uc->uc_stack, NULL, usp) == -EFAULT)
+		goto badframe;
+
+	if (mangle_kernel_stack(regs, temp, &uc->uc_extra))
+		goto badframe;
+
+	return 0;
+
+badframe:
+	return 1;
+}
+
+asmlinkage int do_sigreturn(unsigned long __unused)
+{
+	struct switch_stack *sw = (struct switch_stack *) &__unused;
+	struct pt_regs *regs = (struct pt_regs *) (sw + 1);
+	unsigned long usp = rdusp();
+	struct sigframe __user *frame = (struct sigframe __user *)(usp - 4);
+	sigset_t set;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__get_user(set.sig[0], &frame->sc.sc_mask) ||
+	    (_NSIG_WORDS > 1 &&
+	     __copy_from_user(&set.sig[1], &frame->extramask,
+			      sizeof(frame->extramask))))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	current->blocked = set;
+	recalc_sigpending();
+
+	if (restore_sigcontext(regs, &frame->sc, frame + 1))
+		goto badframe;
+	return regs->d0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+asmlinkage int do_rt_sigreturn(unsigned long __unused)
+{
+	struct switch_stack *sw = (struct switch_stack *) &__unused;
+	struct pt_regs *regs = (struct pt_regs *) (sw + 1);
+	unsigned long usp = rdusp();
+	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(usp - 4);
+	sigset_t set;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	current->blocked = set;
+	recalc_sigpending();
+
+	if (rt_restore_ucontext(regs, sw, &frame->uc))
+		goto badframe;
+	return regs->d0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+/*
+ * Set up a signal frame.
+ */
+
+static inline void save_fpu_state(struct sigcontext *sc, struct pt_regs *regs)
+{
+	if (FPU_IS_EMU) {
+		/* save registers */
+		memcpy(sc->sc_fpcntl, current->thread.fpcntl, 12);
+		memcpy(sc->sc_fpregs, current->thread.fp, 24);
+		return;
+	}
+
+	__asm__ volatile (".chip 68k/68881\n\t"
+			  "fsave %0\n\t"
+			  ".chip 68k"
+			  : : "m" (*sc->sc_fpstate) : "memory");
+
+	if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) {
+		fpu_version = sc->sc_fpstate[0];
+		if (CPU_IS_020_OR_030 &&
+		    regs->vector >= (VEC_FPBRUC * 4) &&
+		    regs->vector <= (VEC_FPNAN * 4)) {
+			/* Clear pending exception in 68882 idle frame */
+			if (*(unsigned short *) sc->sc_fpstate == 0x1f38)
+				sc->sc_fpstate[0x38] |= 1 << 3;
+		}
+		__asm__ volatile (".chip 68k/68881\n\t"
+				  "fmovemx %%fp0-%%fp1,%0\n\t"
+				  "fmoveml %%fpcr/%%fpsr/%%fpiar,%1\n\t"
+				  ".chip 68k"
+				  : "=m" (*sc->sc_fpregs),
+				    "=m" (*sc->sc_fpcntl)
+				  : /* no inputs */
+				  : "memory");
+	}
+}
+
+static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs *regs)
+{
+	unsigned char fpstate[FPCONTEXT_SIZE];
+	int context_size = CPU_IS_060 ? 8 : 0;
+	int err = 0;
+
+	if (FPU_IS_EMU) {
+		/* save fpu control register */
+		err |= copy_to_user(uc->uc_mcontext.fpregs.f_fpcntl,
+				current->thread.fpcntl, 12);
+		/* save all other fpu register */
+		err |= copy_to_user(uc->uc_mcontext.fpregs.f_fpregs,
+				current->thread.fp, 96);
+		return err;
+	}
+
+	__asm__ volatile (".chip 68k/68881\n\t"
+			  "fsave %0\n\t"
+			  ".chip 68k"
+			  : : "m" (*fpstate) : "memory");
+
+	err |= __put_user(*(long *)fpstate, (long __user *)&uc->uc_fpstate);
+	if (CPU_IS_060 ? fpstate[2] : fpstate[0]) {
+		fpregset_t fpregs;
+		if (!CPU_IS_060)
+			context_size = fpstate[1];
+		fpu_version = fpstate[0];
+		if (CPU_IS_020_OR_030 &&
+		    regs->vector >= (VEC_FPBRUC * 4) &&
+		    regs->vector <= (VEC_FPNAN * 4)) {
+			/* Clear pending exception in 68882 idle frame */
+			if (*(unsigned short *) fpstate == 0x1f38)
+				fpstate[0x38] |= 1 << 3;
+		}
+		__asm__ volatile (".chip 68k/68881\n\t"
+				  "fmovemx %%fp0-%%fp7,%0\n\t"
+				  "fmoveml %%fpcr/%%fpsr/%%fpiar,%1\n\t"
+				  ".chip 68k"
+				  : "=m" (*fpregs.f_fpregs),
+				    "=m" (*fpregs.f_fpcntl)
+				  : /* no inputs */
+				  : "memory");
+		err |= copy_to_user(&uc->uc_mcontext.fpregs, &fpregs,
+				    sizeof(fpregs));
+	}
+	if (context_size)
+		err |= copy_to_user((long __user *)&uc->uc_fpstate + 1, fpstate + 4,
+				    context_size);
+	return err;
+}
+
+static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
+			     unsigned long mask)
+{
+	sc->sc_mask = mask;
+	sc->sc_usp = rdusp();
+	sc->sc_d0 = regs->d0;
+	sc->sc_d1 = regs->d1;
+	sc->sc_a0 = regs->a0;
+	sc->sc_a1 = regs->a1;
+	sc->sc_sr = regs->sr;
+	sc->sc_pc = regs->pc;
+	sc->sc_formatvec = regs->format << 12 | regs->vector;
+	save_fpu_state(sc, regs);
+}
+
+static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
+	greg_t __user *gregs = uc->uc_mcontext.gregs;
+	int err = 0;
+
+	err |= __put_user(MCONTEXT_VERSION, &uc->uc_mcontext.version);
+	err |= __put_user(regs->d0, &gregs[0]);
+	err |= __put_user(regs->d1, &gregs[1]);
+	err |= __put_user(regs->d2, &gregs[2]);
+	err |= __put_user(regs->d3, &gregs[3]);
+	err |= __put_user(regs->d4, &gregs[4]);
+	err |= __put_user(regs->d5, &gregs[5]);
+	err |= __put_user(sw->d6, &gregs[6]);
+	err |= __put_user(sw->d7, &gregs[7]);
+	err |= __put_user(regs->a0, &gregs[8]);
+	err |= __put_user(regs->a1, &gregs[9]);
+	err |= __put_user(regs->a2, &gregs[10]);
+	err |= __put_user(sw->a3, &gregs[11]);
+	err |= __put_user(sw->a4, &gregs[12]);
+	err |= __put_user(sw->a5, &gregs[13]);
+	err |= __put_user(sw->a6, &gregs[14]);
+	err |= __put_user(rdusp(), &gregs[15]);
+	err |= __put_user(regs->pc, &gregs[16]);
+	err |= __put_user(regs->sr, &gregs[17]);
+	err |= __put_user((regs->format << 12) | regs->vector, &uc->uc_formatvec);
+	err |= rt_save_fpu_state(uc, regs);
+	return err;
+}
+
+static inline void push_cache (unsigned long vaddr)
+{
+	/*
+	 * Using the old cache_push_v() was really a big waste.
+	 *
+	 * What we are trying to do is to flush 8 bytes to ram.
+	 * Flushing 2 cache lines of 16 bytes is much cheaper than
+	 * flushing 1 or 2 pages, as previously done in
+	 * cache_push_v().
+	 *                                                     Jes
+	 */
+	if (CPU_IS_040) {
+		unsigned long temp;
+
+		__asm__ __volatile__ (".chip 68040\n\t"
+				      "nop\n\t"
+				      "ptestr (%1)\n\t"
+				      "movec %%mmusr,%0\n\t"
+				      ".chip 68k"
+				      : "=r" (temp)
+				      : "a" (vaddr));
+
+		temp &= PAGE_MASK;
+		temp |= vaddr & ~PAGE_MASK;
+
+		__asm__ __volatile__ (".chip 68040\n\t"
+				      "nop\n\t"
+				      "cpushl %%bc,(%0)\n\t"
+				      ".chip 68k"
+				      : : "a" (temp));
+	}
+	else if (CPU_IS_060) {
+		unsigned long temp;
+		__asm__ __volatile__ (".chip 68060\n\t"
+				      "plpar (%0)\n\t"
+				      ".chip 68k"
+				      : "=a" (temp)
+				      : "0" (vaddr));
+		__asm__ __volatile__ (".chip 68060\n\t"
+				      "cpushl %%bc,(%0)\n\t"
+				      ".chip 68k"
+				      : : "a" (temp));
+	}
+	else {
+		/*
+		 * 68030/68020 have no writeback cache;
+		 * still need to clear icache.
+		 * Note that vaddr is guaranteed to be long word aligned.
+		 */
+		unsigned long temp;
+		asm volatile ("movec %%cacr,%0" : "=r" (temp));
+		temp += 4;
+		asm volatile ("movec %0,%%caar\n\t"
+			      "movec %1,%%cacr"
+			      : : "r" (vaddr), "r" (temp));
+		asm volatile ("movec %0,%%caar\n\t"
+			      "movec %1,%%cacr"
+			      : : "r" (vaddr + 4), "r" (temp));
+	}
+}
+
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
+{
+	unsigned long usp;
+
+	/* Default to using normal stack.  */
+	usp = rdusp();
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (!sas_ss_flags(usp))
+			usp = current->sas_ss_sp + current->sas_ss_size;
+	}
+	return (void __user *)((usp - frame_size) & -8UL);
+}
+
+static int setup_frame (int sig, struct k_sigaction *ka,
+			 sigset_t *set, struct pt_regs *regs)
+{
+	struct sigframe __user *frame;
+	int fsize = frame_extra_sizes[regs->format];
+	struct sigcontext context;
+	int err = 0;
+
+	if (fsize < 0) {
+#ifdef DEBUG
+		printk ("setup_frame: Unknown frame format %#x\n",
+			regs->format);
+#endif
+		goto give_sigsegv;
+	}
+
+	frame = get_sigframe(ka, regs, sizeof(*frame) + fsize);
+
+	if (fsize)
+		err |= copy_to_user (frame + 1, regs + 1, fsize);
+
+	err |= __put_user((current_thread_info()->exec_domain
+			   && current_thread_info()->exec_domain->signal_invmap
+			   && sig < 32
+			   ? current_thread_info()->exec_domain->signal_invmap[sig]
+			   : sig),
+			  &frame->sig);
+
+	err |= __put_user(regs->vector, &frame->code);
+	err |= __put_user(&frame->sc, &frame->psc);
+
+	if (_NSIG_WORDS > 1)
+		err |= copy_to_user(frame->extramask, &set->sig[1],
+				    sizeof(frame->extramask));
+
+	setup_sigcontext(&context, regs, set->sig[0]);
+	err |= copy_to_user (&frame->sc, &context, sizeof(context));
+
+	/* Set up to return from userspace.  */
+	err |= __put_user(frame->retcode, &frame->pretcode);
+	/* moveq #,d0; trap #0 */
+	err |= __put_user(0x70004e40 + (__NR_sigreturn << 16),
+			  (long __user *)(frame->retcode));
+
+	if (err)
+		goto give_sigsegv;
+
+	push_cache ((unsigned long) &frame->retcode);
+
+	/*
+	 * Set up registers for signal handler.  All the state we are about
+	 * to destroy is successfully copied to sigframe.
+	 */
+	wrusp ((unsigned long) frame);
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+
+	/*
+	 * This is subtle; if we build more than one sigframe, all but the
+	 * first one will see frame format 0 and have fsize == 0, so we won't
+	 * screw stkadj.
+	 */
+	if (fsize)
+		regs->stkadj = fsize;
+
+	/* Prepare to skip over the extra stuff in the exception frame.  */
+	if (regs->stkadj) {
+		struct pt_regs *tregs =
+			(struct pt_regs *)((ulong)regs + regs->stkadj);
+#ifdef DEBUG
+		printk("Performing stackadjust=%04x\n", regs->stkadj);
+#endif
+		/* This must be copied with decreasing addresses to
+                   handle overlaps.  */
+		tregs->vector = 0;
+		tregs->format = 0;
+		tregs->pc = regs->pc;
+		tregs->sr = regs->sr;
+	}
+	return 0;
+
+give_sigsegv:
+	force_sigsegv(sig, current);
+	return err;
+}
+
+static int setup_rt_frame (int sig, struct k_sigaction *ka, siginfo_t *info,
+			    sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	int fsize = frame_extra_sizes[regs->format];
+	int err = 0;
+
+	if (fsize < 0) {
+#ifdef DEBUG
+		printk ("setup_frame: Unknown frame format %#x\n",
+			regs->format);
+#endif
+		goto give_sigsegv;
+	}
+
+	frame = get_sigframe(ka, regs, sizeof(*frame));
+
+	if (fsize)
+		err |= copy_to_user (&frame->uc.uc_extra, regs + 1, fsize);
+
+	err |= __put_user((current_thread_info()->exec_domain
+			   && current_thread_info()->exec_domain->signal_invmap
+			   && sig < 32
+			   ? current_thread_info()->exec_domain->signal_invmap[sig]
+			   : sig),
+			  &frame->sig);
+	err |= __put_user(&frame->info, &frame->pinfo);
+	err |= __put_user(&frame->uc, &frame->puc);
+	err |= copy_siginfo_to_user(&frame->info, info);
+
+	/* Create the ucontext.  */
+	err |= __put_user(0, &frame->uc.uc_flags);
+	err |= __put_user(NULL, &frame->uc.uc_link);
+	err |= __put_user((void __user *)current->sas_ss_sp,
+			  &frame->uc.uc_stack.ss_sp);
+	err |= __put_user(sas_ss_flags(rdusp()),
+			  &frame->uc.uc_stack.ss_flags);
+	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= rt_setup_ucontext(&frame->uc, regs);
+	err |= copy_to_user (&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	/* Set up to return from userspace.  */
+	err |= __put_user(frame->retcode, &frame->pretcode);
+#ifdef __mcoldfire__
+	/* movel #__NR_rt_sigreturn,d0; trap #0 */
+	err |= __put_user(0x203c0000, (long __user *)(frame->retcode + 0));
+	err |= __put_user(0x00004e40 + (__NR_rt_sigreturn << 16),
+			  (long __user *)(frame->retcode + 4));
+#else
+	/* moveq #,d0; notb d0; trap #0 */
+	err |= __put_user(0x70004600 + ((__NR_rt_sigreturn ^ 0xff) << 16),
+			  (long __user *)(frame->retcode + 0));
+	err |= __put_user(0x4e40, (short __user *)(frame->retcode + 4));
+#endif
+
+	if (err)
+		goto give_sigsegv;
+
+	push_cache ((unsigned long) &frame->retcode);
+
+	/*
+	 * Set up registers for signal handler.  All the state we are about
+	 * to destroy is successfully copied to sigframe.
+	 */
+	wrusp ((unsigned long) frame);
+	regs->pc = (unsigned long) ka->sa.sa_handler;
+
+	/*
+	 * This is subtle; if we build more than one sigframe, all but the
+	 * first one will see frame format 0 and have fsize == 0, so we won't
+	 * screw stkadj.
+	 */
+	if (fsize)
+		regs->stkadj = fsize;
+
+	/* Prepare to skip over the extra stuff in the exception frame.  */
+	if (regs->stkadj) {
+		struct pt_regs *tregs =
+			(struct pt_regs *)((ulong)regs + regs->stkadj);
+#ifdef DEBUG
+		printk("Performing stackadjust=%04x\n", regs->stkadj);
+#endif
+		/* This must be copied with decreasing addresses to
+                   handle overlaps.  */
+		tregs->vector = 0;
+		tregs->format = 0;
+		tregs->pc = regs->pc;
+		tregs->sr = regs->sr;
+	}
+	return 0;
+
+give_sigsegv:
+	force_sigsegv(sig, current);
+	return err;
+}
+
+static inline void
+handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler)
+{
+	switch (regs->d0) {
+	case -ERESTARTNOHAND:
+		if (!has_handler)
+			goto do_restart;
+		regs->d0 = -EINTR;
+		break;
+
+	case -ERESTART_RESTARTBLOCK:
+		if (!has_handler) {
+			regs->d0 = __NR_restart_syscall;
+			regs->pc -= 2;
+			break;
+		}
+		regs->d0 = -EINTR;
+		break;
+
+	case -ERESTARTSYS:
+		if (has_handler && !(ka->sa.sa_flags & SA_RESTART)) {
+			regs->d0 = -EINTR;
+			break;
+		}
+	/* fallthrough */
+	case -ERESTARTNOINTR:
+	do_restart:
+		regs->d0 = regs->orig_d0;
+		regs->pc -= 2;
+		break;
+	}
+}
+
+void ptrace_signal_deliver(struct pt_regs *regs, void *cookie)
+{
+	if (regs->orig_d0 < 0)
+		return;
+	switch (regs->d0) {
+	case -ERESTARTNOHAND:
+	case -ERESTARTSYS:
+	case -ERESTARTNOINTR:
+		regs->d0 = regs->orig_d0;
+		regs->orig_d0 = -1;
+		regs->pc -= 2;
+		break;
+	}
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+static void
+handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
+	      sigset_t *oldset, struct pt_regs *regs)
+{
+	int err;
+	/* are we from a system call? */
+	if (regs->orig_d0 >= 0)
+		/* If so, check system call restarting.. */
+		handle_restart(regs, ka, 1);
+
+	/* set up the stack frame */
+	if (ka->sa.sa_flags & SA_SIGINFO)
+		err = setup_rt_frame(sig, ka, info, oldset, regs);
+	else
+		err = setup_frame(sig, ka, oldset, regs);
+
+	if (err)
+		return;
+
+	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&current->blocked,sig);
+	recalc_sigpending();
+
+	if (test_thread_flag(TIF_DELAYED_TRACE)) {
+		regs->sr &= ~0x8000;
+		send_sig(SIGTRAP, current, 1);
+	}
+
+	clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+asmlinkage void do_signal(struct pt_regs *regs)
+{
+	siginfo_t info;
+	struct k_sigaction ka;
+	int signr;
+	sigset_t *oldset;
+
+	current->thread.esp0 = (unsigned long) regs;
+
+	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+		oldset = &current->saved_sigmask;
+	else
+		oldset = &current->blocked;
+
+	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+	if (signr > 0) {
+		/* Whee!  Actually deliver the signal.  */
+		handle_signal(signr, &ka, &info, oldset, regs);
+		return;
+	}
+
+	/* Did we come from a system call? */
+	if (regs->orig_d0 >= 0)
+		/* Restart the system call - no handlers present */
+		handle_restart(regs, NULL, 0);
+
+	/* If there's no signal to deliver, we just restore the saved mask.  */
+	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		clear_thread_flag(TIF_RESTORE_SIGMASK);
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
+}

diff --git a/arch/m68knommu/kernel/signal.c b/arch/m68k/kernel/signal_no.c
similarity index 100%
rename from arch/m68knommu/kernel/signal.c
rename to arch/m68k/kernel/signal_no.c


diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 3db2e7f..63013df 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c

@@ -1,546 +1,5 @@
-/*
- * linux/arch/m68k/kernel/sys_m68k.c
- *
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/m68k
- * platform.
- */
-
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/ipc.h>
-
-#include <asm/setup.h>
-#include <asm/uaccess.h>
-#include <asm/cachectl.h>
-#include <asm/traps.h>
-#include <asm/page.h>
-#include <asm/unistd.h>
-#include <linux/elf.h>
-#include <asm/tlb.h>
-
-asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
-			     unsigned long error_code);
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long pgoff)
-{
-	/*
-	 * This is wrong for sun3 - there PAGE_SIZE is 8Kb,
-	 * so we need to shift the argument down by 1; m68k mmap64(3)
-	 * (in libc) expects the last argument of mmap2 in 4Kb units.
-	 */
-	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
-}
-
-/* Convert virtual (user) address VADDR to physical address PADDR */
-#define virt_to_phys_040(vaddr)						\
-({									\
-  unsigned long _mmusr, _paddr;						\
-									\
-  __asm__ __volatile__ (".chip 68040\n\t"				\
-			"ptestr (%1)\n\t"				\
-			"movec %%mmusr,%0\n\t"				\
-			".chip 68k"					\
-			: "=r" (_mmusr)					\
-			: "a" (vaddr));					\
-  _paddr = (_mmusr & MMU_R_040) ? (_mmusr & PAGE_MASK) : 0;		\
-  _paddr;								\
-})
-
-static inline int
-cache_flush_040 (unsigned long addr, int scope, int cache, unsigned long len)
-{
-  unsigned long paddr, i;
-
-  switch (scope)
-    {
-    case FLUSH_SCOPE_ALL:
-      switch (cache)
-	{
-	case FLUSH_CACHE_DATA:
-	  /* This nop is needed for some broken versions of the 68040.  */
-	  __asm__ __volatile__ ("nop\n\t"
-				".chip 68040\n\t"
-				"cpusha %dc\n\t"
-				".chip 68k");
-	  break;
-	case FLUSH_CACHE_INSN:
-	  __asm__ __volatile__ ("nop\n\t"
-				".chip 68040\n\t"
-				"cpusha %ic\n\t"
-				".chip 68k");
-	  break;
-	default:
-	case FLUSH_CACHE_BOTH:
-	  __asm__ __volatile__ ("nop\n\t"
-				".chip 68040\n\t"
-				"cpusha %bc\n\t"
-				".chip 68k");
-	  break;
-	}
-      break;
-
-    case FLUSH_SCOPE_LINE:
-      /* Find the physical address of the first mapped page in the
-	 address range.  */
-      if ((paddr = virt_to_phys_040(addr))) {
-        paddr += addr & ~(PAGE_MASK | 15);
-        len = (len + (addr & 15) + 15) >> 4;
-      } else {
-	unsigned long tmp = PAGE_SIZE - (addr & ~PAGE_MASK);
-
-	if (len <= tmp)
-	  return 0;
-	addr += tmp;
-	len -= tmp;
-	tmp = PAGE_SIZE;
-	for (;;)
-	  {
-	    if ((paddr = virt_to_phys_040(addr)))
-	      break;
-	    if (len <= tmp)
-	      return 0;
-	    addr += tmp;
-	    len -= tmp;
-	  }
-	len = (len + 15) >> 4;
-      }
-      i = (PAGE_SIZE - (paddr & ~PAGE_MASK)) >> 4;
-      while (len--)
-	{
-	  switch (cache)
-	    {
-	    case FLUSH_CACHE_DATA:
-	      __asm__ __volatile__ ("nop\n\t"
-				    ".chip 68040\n\t"
-				    "cpushl %%dc,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    case FLUSH_CACHE_INSN:
-	      __asm__ __volatile__ ("nop\n\t"
-				    ".chip 68040\n\t"
-				    "cpushl %%ic,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    default:
-	    case FLUSH_CACHE_BOTH:
-	      __asm__ __volatile__ ("nop\n\t"
-				    ".chip 68040\n\t"
-				    "cpushl %%bc,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    }
-	  if (!--i && len)
-	    {
-	      /*
-	       * No need to page align here since it is done by
-	       * virt_to_phys_040().
-	       */
-	      addr += PAGE_SIZE;
-	      i = PAGE_SIZE / 16;
-	      /* Recompute physical address when crossing a page
-	         boundary. */
-	      for (;;)
-		{
-		  if ((paddr = virt_to_phys_040(addr)))
-		    break;
-		  if (len <= i)
-		    return 0;
-		  len -= i;
-		  addr += PAGE_SIZE;
-		}
-	    }
-	  else
-	    paddr += 16;
-	}
-      break;
-
-    default:
-    case FLUSH_SCOPE_PAGE:
-      len += (addr & ~PAGE_MASK) + (PAGE_SIZE - 1);
-      for (len >>= PAGE_SHIFT; len--; addr += PAGE_SIZE)
-	{
-	  if (!(paddr = virt_to_phys_040(addr)))
-	    continue;
-	  switch (cache)
-	    {
-	    case FLUSH_CACHE_DATA:
-	      __asm__ __volatile__ ("nop\n\t"
-				    ".chip 68040\n\t"
-				    "cpushp %%dc,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    case FLUSH_CACHE_INSN:
-	      __asm__ __volatile__ ("nop\n\t"
-				    ".chip 68040\n\t"
-				    "cpushp %%ic,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    default:
-	    case FLUSH_CACHE_BOTH:
-	      __asm__ __volatile__ ("nop\n\t"
-				    ".chip 68040\n\t"
-				    "cpushp %%bc,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    }
-	}
-      break;
-    }
-  return 0;
-}
-
-#define virt_to_phys_060(vaddr)				\
-({							\
-  unsigned long paddr;					\
-  __asm__ __volatile__ (".chip 68060\n\t"		\
-			"plpar (%0)\n\t"		\
-			".chip 68k"			\
-			: "=a" (paddr)			\
-			: "0" (vaddr));			\
-  (paddr); /* XXX */					\
-})
-
-static inline int
-cache_flush_060 (unsigned long addr, int scope, int cache, unsigned long len)
-{
-  unsigned long paddr, i;
-
-  /*
-   * 68060 manual says:
-   *  cpush %dc : flush DC, remains valid (with our %cacr setup)
-   *  cpush %ic : invalidate IC
-   *  cpush %bc : flush DC + invalidate IC
-   */
-  switch (scope)
-    {
-    case FLUSH_SCOPE_ALL:
-      switch (cache)
-	{
-	case FLUSH_CACHE_DATA:
-	  __asm__ __volatile__ (".chip 68060\n\t"
-				"cpusha %dc\n\t"
-				".chip 68k");
-	  break;
-	case FLUSH_CACHE_INSN:
-	  __asm__ __volatile__ (".chip 68060\n\t"
-				"cpusha %ic\n\t"
-				".chip 68k");
-	  break;
-	default:
-	case FLUSH_CACHE_BOTH:
-	  __asm__ __volatile__ (".chip 68060\n\t"
-				"cpusha %bc\n\t"
-				".chip 68k");
-	  break;
-	}
-      break;
-
-    case FLUSH_SCOPE_LINE:
-      /* Find the physical address of the first mapped page in the
-	 address range.  */
-      len += addr & 15;
-      addr &= -16;
-      if (!(paddr = virt_to_phys_060(addr))) {
-	unsigned long tmp = PAGE_SIZE - (addr & ~PAGE_MASK);
-
-	if (len <= tmp)
-	  return 0;
-	addr += tmp;
-	len -= tmp;
-	tmp = PAGE_SIZE;
-	for (;;)
-	  {
-	    if ((paddr = virt_to_phys_060(addr)))
-	      break;
-	    if (len <= tmp)
-	      return 0;
-	    addr += tmp;
-	    len -= tmp;
-	  }
-      }
-      len = (len + 15) >> 4;
-      i = (PAGE_SIZE - (paddr & ~PAGE_MASK)) >> 4;
-      while (len--)
-	{
-	  switch (cache)
-	    {
-	    case FLUSH_CACHE_DATA:
-	      __asm__ __volatile__ (".chip 68060\n\t"
-				    "cpushl %%dc,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    case FLUSH_CACHE_INSN:
-	      __asm__ __volatile__ (".chip 68060\n\t"
-				    "cpushl %%ic,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    default:
-	    case FLUSH_CACHE_BOTH:
-	      __asm__ __volatile__ (".chip 68060\n\t"
-				    "cpushl %%bc,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    }
-	  if (!--i && len)
-	    {
-
-	      /*
-	       * We just want to jump to the first cache line
-	       * in the next page.
-	       */
-	      addr += PAGE_SIZE;
-	      addr &= PAGE_MASK;
-
-	      i = PAGE_SIZE / 16;
-	      /* Recompute physical address when crossing a page
-	         boundary. */
-	      for (;;)
-	        {
-	          if ((paddr = virt_to_phys_060(addr)))
-	            break;
-	          if (len <= i)
-	            return 0;
-	          len -= i;
-	          addr += PAGE_SIZE;
-	        }
-	    }
-	  else
-	    paddr += 16;
-	}
-      break;
-
-    default:
-    case FLUSH_SCOPE_PAGE:
-      len += (addr & ~PAGE_MASK) + (PAGE_SIZE - 1);
-      addr &= PAGE_MASK;	/* Workaround for bug in some
-				   revisions of the 68060 */
-      for (len >>= PAGE_SHIFT; len--; addr += PAGE_SIZE)
-	{
-	  if (!(paddr = virt_to_phys_060(addr)))
-	    continue;
-	  switch (cache)
-	    {
-	    case FLUSH_CACHE_DATA:
-	      __asm__ __volatile__ (".chip 68060\n\t"
-				    "cpushp %%dc,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    case FLUSH_CACHE_INSN:
-	      __asm__ __volatile__ (".chip 68060\n\t"
-				    "cpushp %%ic,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    default:
-	    case FLUSH_CACHE_BOTH:
-	      __asm__ __volatile__ (".chip 68060\n\t"
-				    "cpushp %%bc,(%0)\n\t"
-				    ".chip 68k"
-				    : : "a" (paddr));
-	      break;
-	    }
-	}
-      break;
-    }
-  return 0;
-}
-
-/* sys_cacheflush -- flush (part of) the processor cache.  */
-asmlinkage int
-sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
-{
-	struct vm_area_struct *vma;
-	int ret = -EINVAL;
-
-	if (scope < FLUSH_SCOPE_LINE || scope > FLUSH_SCOPE_ALL ||
-	    cache & ~FLUSH_CACHE_BOTH)
-		goto out;
-
-	if (scope == FLUSH_SCOPE_ALL) {
-		/* Only the superuser may explicitly flush the whole cache. */
-		ret = -EPERM;
-		if (!capable(CAP_SYS_ADMIN))
-			goto out;
-	} else {
-		/*
-		 * Verify that the specified address region actually belongs
-		 * to this process.
-		 */
-		vma = find_vma (current->mm, addr);
-		ret = -EINVAL;
-		/* Check for overflow.  */
-		if (addr + len < addr)
-			goto out;
-		if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end)
-			goto out;
-	}
-
-	if (CPU_IS_020_OR_030) {
-		if (scope == FLUSH_SCOPE_LINE && len < 256) {
-			unsigned long cacr;
-			__asm__ ("movec %%cacr, %0" : "=r" (cacr));
-			if (cache & FLUSH_CACHE_INSN)
-				cacr |= 4;
-			if (cache & FLUSH_CACHE_DATA)
-				cacr |= 0x400;
-			len >>= 2;
-			while (len--) {
-				__asm__ __volatile__ ("movec %1, %%caar\n\t"
-						      "movec %0, %%cacr"
-						      : /* no outputs */
-						      : "r" (cacr), "r" (addr));
-				addr += 4;
-			}
-		} else {
-			/* Flush the whole cache, even if page granularity requested. */
-			unsigned long cacr;
-			__asm__ ("movec %%cacr, %0" : "=r" (cacr));
-			if (cache & FLUSH_CACHE_INSN)
-				cacr |= 8;
-			if (cache & FLUSH_CACHE_DATA)
-				cacr |= 0x800;
-			__asm__ __volatile__ ("movec %0, %%cacr" : : "r" (cacr));
-		}
-		ret = 0;
-		goto out;
-	} else {
-	    /*
-	     * 040 or 060: don't blindly trust 'scope', someone could
-	     * try to flush a few megs of memory.
-	     */
-
-	    if (len>=3*PAGE_SIZE && scope<FLUSH_SCOPE_PAGE)
-	        scope=FLUSH_SCOPE_PAGE;
-	    if (len>=10*PAGE_SIZE && scope<FLUSH_SCOPE_ALL)
-	        scope=FLUSH_SCOPE_ALL;
-	    if (CPU_IS_040) {
-		ret = cache_flush_040 (addr, scope, cache, len);
-	    } else if (CPU_IS_060) {
-		ret = cache_flush_060 (addr, scope, cache, len);
-	    }
-	}
-out:
-	return ret;
-}
-
-asmlinkage int sys_getpagesize(void)
-{
-	return PAGE_SIZE;
-}
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register long __res asm ("%d0") = __NR_execve;
-	register long __a asm ("%d1") = (long)(filename);
-	register long __b asm ("%d2") = (long)(argv);
-	register long __c asm ("%d3") = (long)(envp);
-	asm volatile ("trap  #0" : "+d" (__res)
-			: "d" (__a), "d" (__b), "d" (__c));
-	return __res;
-}
-
-asmlinkage unsigned long sys_get_thread_area(void)
-{
-	return current_thread_info()->tp_value;
-}
-
-asmlinkage int sys_set_thread_area(unsigned long tp)
-{
-	current_thread_info()->tp_value = tp;
-	return 0;
-}
-
-/* This syscall gets its arguments in A0 (mem), D2 (oldval) and
-   D1 (newval).  */
-asmlinkage int
-sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
-		      unsigned long __user * mem)
-{
-	/* This was borrowed from ARM's implementation.  */
-	for (;;) {
-		struct mm_struct *mm = current->mm;
-		pgd_t *pgd;
-		pmd_t *pmd;
-		pte_t *pte;
-		spinlock_t *ptl;
-		unsigned long mem_value;
-
-		down_read(&mm->mmap_sem);
-		pgd = pgd_offset(mm, (unsigned long)mem);
-		if (!pgd_present(*pgd))
-			goto bad_access;
-		pmd = pmd_offset(pgd, (unsigned long)mem);
-		if (!pmd_present(*pmd))
-			goto bad_access;
-		pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl);
-		if (!pte_present(*pte) || !pte_dirty(*pte)
-		    || !pte_write(*pte)) {
-			pte_unmap_unlock(pte, ptl);
-			goto bad_access;
-		}
-
-		mem_value = *mem;
-		if (mem_value == oldval)
-			*mem = newval;
-
-		pte_unmap_unlock(pte, ptl);
-		up_read(&mm->mmap_sem);
-		return mem_value;
-
-	      bad_access:
-		up_read(&mm->mmap_sem);
-		/* This is not necessarily a bad access, we can get here if
-		   a memory we're trying to write to should be copied-on-write.
-		   Make the kernel do the necessary page stuff, then re-iterate.
-		   Simulate a write access fault to do that.  */
-		{
-			/* The first argument of the function corresponds to
-			   D1, which is the first field of struct pt_regs.  */
-			struct pt_regs *fp = (struct pt_regs *)&newval;
-
-			/* '3' is an RMW flag.  */
-			if (do_page_fault(fp, (unsigned long)mem, 3))
-				/* If the do_page_fault() failed, we don't
-				   have anything meaningful to return.
-				   There should be a SIGSEGV pending for
-				   the process.  */
-				return 0xdeadbeef;
-		}
-	}
-}
-
-asmlinkage int sys_atomic_barrier(void)
-{
-	/* no code needed for uniprocs */
-	return 0;
-}
+#ifdef CONFIG_MMU
+#include "sys_m68k_mm.c"
+#else
+#include "sys_m68k_no.c"
+#endif

diff --git a/arch/m68k/kernel/sys_m68k_mm.c b/arch/m68k/kernel/sys_m68k_mm.c
new file mode 100644
index 0000000..3db2e7f
--- /dev/null
+++ b/arch/m68k/kernel/sys_m68k_mm.c

@@ -0,0 +1,546 @@
+/*
+ * linux/arch/m68k/kernel/sys_m68k.c
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/m68k
+ * platform.
+ */
+
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/ipc.h>
+
+#include <asm/setup.h>
+#include <asm/uaccess.h>
+#include <asm/cachectl.h>
+#include <asm/traps.h>
+#include <asm/page.h>
+#include <asm/unistd.h>
+#include <linux/elf.h>
+#include <asm/tlb.h>
+
+asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
+			     unsigned long error_code);
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags,
+	unsigned long fd, unsigned long pgoff)
+{
+	/*
+	 * This is wrong for sun3 - there PAGE_SIZE is 8Kb,
+	 * so we need to shift the argument down by 1; m68k mmap64(3)
+	 * (in libc) expects the last argument of mmap2 in 4Kb units.
+	 */
+	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
+}
+
+/* Convert virtual (user) address VADDR to physical address PADDR */
+#define virt_to_phys_040(vaddr)						\
+({									\
+  unsigned long _mmusr, _paddr;						\
+									\
+  __asm__ __volatile__ (".chip 68040\n\t"				\
+			"ptestr (%1)\n\t"				\
+			"movec %%mmusr,%0\n\t"				\
+			".chip 68k"					\
+			: "=r" (_mmusr)					\
+			: "a" (vaddr));					\
+  _paddr = (_mmusr & MMU_R_040) ? (_mmusr & PAGE_MASK) : 0;		\
+  _paddr;								\
+})
+
+static inline int
+cache_flush_040 (unsigned long addr, int scope, int cache, unsigned long len)
+{
+  unsigned long paddr, i;
+
+  switch (scope)
+    {
+    case FLUSH_SCOPE_ALL:
+      switch (cache)
+	{
+	case FLUSH_CACHE_DATA:
+	  /* This nop is needed for some broken versions of the 68040.  */
+	  __asm__ __volatile__ ("nop\n\t"
+				".chip 68040\n\t"
+				"cpusha %dc\n\t"
+				".chip 68k");
+	  break;
+	case FLUSH_CACHE_INSN:
+	  __asm__ __volatile__ ("nop\n\t"
+				".chip 68040\n\t"
+				"cpusha %ic\n\t"
+				".chip 68k");
+	  break;
+	default:
+	case FLUSH_CACHE_BOTH:
+	  __asm__ __volatile__ ("nop\n\t"
+				".chip 68040\n\t"
+				"cpusha %bc\n\t"
+				".chip 68k");
+	  break;
+	}
+      break;
+
+    case FLUSH_SCOPE_LINE:
+      /* Find the physical address of the first mapped page in the
+	 address range.  */
+      if ((paddr = virt_to_phys_040(addr))) {
+        paddr += addr & ~(PAGE_MASK | 15);
+        len = (len + (addr & 15) + 15) >> 4;
+      } else {
+	unsigned long tmp = PAGE_SIZE - (addr & ~PAGE_MASK);
+
+	if (len <= tmp)
+	  return 0;
+	addr += tmp;
+	len -= tmp;
+	tmp = PAGE_SIZE;
+	for (;;)
+	  {
+	    if ((paddr = virt_to_phys_040(addr)))
+	      break;
+	    if (len <= tmp)
+	      return 0;
+	    addr += tmp;
+	    len -= tmp;
+	  }
+	len = (len + 15) >> 4;
+      }
+      i = (PAGE_SIZE - (paddr & ~PAGE_MASK)) >> 4;
+      while (len--)
+	{
+	  switch (cache)
+	    {
+	    case FLUSH_CACHE_DATA:
+	      __asm__ __volatile__ ("nop\n\t"
+				    ".chip 68040\n\t"
+				    "cpushl %%dc,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    case FLUSH_CACHE_INSN:
+	      __asm__ __volatile__ ("nop\n\t"
+				    ".chip 68040\n\t"
+				    "cpushl %%ic,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    default:
+	    case FLUSH_CACHE_BOTH:
+	      __asm__ __volatile__ ("nop\n\t"
+				    ".chip 68040\n\t"
+				    "cpushl %%bc,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    }
+	  if (!--i && len)
+	    {
+	      /*
+	       * No need to page align here since it is done by
+	       * virt_to_phys_040().
+	       */
+	      addr += PAGE_SIZE;
+	      i = PAGE_SIZE / 16;
+	      /* Recompute physical address when crossing a page
+	         boundary. */
+	      for (;;)
+		{
+		  if ((paddr = virt_to_phys_040(addr)))
+		    break;
+		  if (len <= i)
+		    return 0;
+		  len -= i;
+		  addr += PAGE_SIZE;
+		}
+	    }
+	  else
+	    paddr += 16;
+	}
+      break;
+
+    default:
+    case FLUSH_SCOPE_PAGE:
+      len += (addr & ~PAGE_MASK) + (PAGE_SIZE - 1);
+      for (len >>= PAGE_SHIFT; len--; addr += PAGE_SIZE)
+	{
+	  if (!(paddr = virt_to_phys_040(addr)))
+	    continue;
+	  switch (cache)
+	    {
+	    case FLUSH_CACHE_DATA:
+	      __asm__ __volatile__ ("nop\n\t"
+				    ".chip 68040\n\t"
+				    "cpushp %%dc,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    case FLUSH_CACHE_INSN:
+	      __asm__ __volatile__ ("nop\n\t"
+				    ".chip 68040\n\t"
+				    "cpushp %%ic,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    default:
+	    case FLUSH_CACHE_BOTH:
+	      __asm__ __volatile__ ("nop\n\t"
+				    ".chip 68040\n\t"
+				    "cpushp %%bc,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    }
+	}
+      break;
+    }
+  return 0;
+}
+
+#define virt_to_phys_060(vaddr)				\
+({							\
+  unsigned long paddr;					\
+  __asm__ __volatile__ (".chip 68060\n\t"		\
+			"plpar (%0)\n\t"		\
+			".chip 68k"			\
+			: "=a" (paddr)			\
+			: "0" (vaddr));			\
+  (paddr); /* XXX */					\
+})
+
+static inline int
+cache_flush_060 (unsigned long addr, int scope, int cache, unsigned long len)
+{
+  unsigned long paddr, i;
+
+  /*
+   * 68060 manual says:
+   *  cpush %dc : flush DC, remains valid (with our %cacr setup)
+   *  cpush %ic : invalidate IC
+   *  cpush %bc : flush DC + invalidate IC
+   */
+  switch (scope)
+    {
+    case FLUSH_SCOPE_ALL:
+      switch (cache)
+	{
+	case FLUSH_CACHE_DATA:
+	  __asm__ __volatile__ (".chip 68060\n\t"
+				"cpusha %dc\n\t"
+				".chip 68k");
+	  break;
+	case FLUSH_CACHE_INSN:
+	  __asm__ __volatile__ (".chip 68060\n\t"
+				"cpusha %ic\n\t"
+				".chip 68k");
+	  break;
+	default:
+	case FLUSH_CACHE_BOTH:
+	  __asm__ __volatile__ (".chip 68060\n\t"
+				"cpusha %bc\n\t"
+				".chip 68k");
+	  break;
+	}
+      break;
+
+    case FLUSH_SCOPE_LINE:
+      /* Find the physical address of the first mapped page in the
+	 address range.  */
+      len += addr & 15;
+      addr &= -16;
+      if (!(paddr = virt_to_phys_060(addr))) {
+	unsigned long tmp = PAGE_SIZE - (addr & ~PAGE_MASK);
+
+	if (len <= tmp)
+	  return 0;
+	addr += tmp;
+	len -= tmp;
+	tmp = PAGE_SIZE;
+	for (;;)
+	  {
+	    if ((paddr = virt_to_phys_060(addr)))
+	      break;
+	    if (len <= tmp)
+	      return 0;
+	    addr += tmp;
+	    len -= tmp;
+	  }
+      }
+      len = (len + 15) >> 4;
+      i = (PAGE_SIZE - (paddr & ~PAGE_MASK)) >> 4;
+      while (len--)
+	{
+	  switch (cache)
+	    {
+	    case FLUSH_CACHE_DATA:
+	      __asm__ __volatile__ (".chip 68060\n\t"
+				    "cpushl %%dc,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    case FLUSH_CACHE_INSN:
+	      __asm__ __volatile__ (".chip 68060\n\t"
+				    "cpushl %%ic,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    default:
+	    case FLUSH_CACHE_BOTH:
+	      __asm__ __volatile__ (".chip 68060\n\t"
+				    "cpushl %%bc,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    }
+	  if (!--i && len)
+	    {
+
+	      /*
+	       * We just want to jump to the first cache line
+	       * in the next page.
+	       */
+	      addr += PAGE_SIZE;
+	      addr &= PAGE_MASK;
+
+	      i = PAGE_SIZE / 16;
+	      /* Recompute physical address when crossing a page
+	         boundary. */
+	      for (;;)
+	        {
+	          if ((paddr = virt_to_phys_060(addr)))
+	            break;
+	          if (len <= i)
+	            return 0;
+	          len -= i;
+	          addr += PAGE_SIZE;
+	        }
+	    }
+	  else
+	    paddr += 16;
+	}
+      break;
+
+    default:
+    case FLUSH_SCOPE_PAGE:
+      len += (addr & ~PAGE_MASK) + (PAGE_SIZE - 1);
+      addr &= PAGE_MASK;	/* Workaround for bug in some
+				   revisions of the 68060 */
+      for (len >>= PAGE_SHIFT; len--; addr += PAGE_SIZE)
+	{
+	  if (!(paddr = virt_to_phys_060(addr)))
+	    continue;
+	  switch (cache)
+	    {
+	    case FLUSH_CACHE_DATA:
+	      __asm__ __volatile__ (".chip 68060\n\t"
+				    "cpushp %%dc,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    case FLUSH_CACHE_INSN:
+	      __asm__ __volatile__ (".chip 68060\n\t"
+				    "cpushp %%ic,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    default:
+	    case FLUSH_CACHE_BOTH:
+	      __asm__ __volatile__ (".chip 68060\n\t"
+				    "cpushp %%bc,(%0)\n\t"
+				    ".chip 68k"
+				    : : "a" (paddr));
+	      break;
+	    }
+	}
+      break;
+    }
+  return 0;
+}
+
+/* sys_cacheflush -- flush (part of) the processor cache.  */
+asmlinkage int
+sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
+{
+	struct vm_area_struct *vma;
+	int ret = -EINVAL;
+
+	if (scope < FLUSH_SCOPE_LINE || scope > FLUSH_SCOPE_ALL ||
+	    cache & ~FLUSH_CACHE_BOTH)
+		goto out;
+
+	if (scope == FLUSH_SCOPE_ALL) {
+		/* Only the superuser may explicitly flush the whole cache. */
+		ret = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto out;
+	} else {
+		/*
+		 * Verify that the specified address region actually belongs
+		 * to this process.
+		 */
+		vma = find_vma (current->mm, addr);
+		ret = -EINVAL;
+		/* Check for overflow.  */
+		if (addr + len < addr)
+			goto out;
+		if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end)
+			goto out;
+	}
+
+	if (CPU_IS_020_OR_030) {
+		if (scope == FLUSH_SCOPE_LINE && len < 256) {
+			unsigned long cacr;
+			__asm__ ("movec %%cacr, %0" : "=r" (cacr));
+			if (cache & FLUSH_CACHE_INSN)
+				cacr |= 4;
+			if (cache & FLUSH_CACHE_DATA)
+				cacr |= 0x400;
+			len >>= 2;
+			while (len--) {
+				__asm__ __volatile__ ("movec %1, %%caar\n\t"
+						      "movec %0, %%cacr"
+						      : /* no outputs */
+						      : "r" (cacr), "r" (addr));
+				addr += 4;
+			}
+		} else {
+			/* Flush the whole cache, even if page granularity requested. */
+			unsigned long cacr;
+			__asm__ ("movec %%cacr, %0" : "=r" (cacr));
+			if (cache & FLUSH_CACHE_INSN)
+				cacr |= 8;
+			if (cache & FLUSH_CACHE_DATA)
+				cacr |= 0x800;
+			__asm__ __volatile__ ("movec %0, %%cacr" : : "r" (cacr));
+		}
+		ret = 0;
+		goto out;
+	} else {
+	    /*
+	     * 040 or 060: don't blindly trust 'scope', someone could
+	     * try to flush a few megs of memory.
+	     */
+
+	    if (len>=3*PAGE_SIZE && scope<FLUSH_SCOPE_PAGE)
+	        scope=FLUSH_SCOPE_PAGE;
+	    if (len>=10*PAGE_SIZE && scope<FLUSH_SCOPE_ALL)
+	        scope=FLUSH_SCOPE_ALL;
+	    if (CPU_IS_040) {
+		ret = cache_flush_040 (addr, scope, cache, len);
+	    } else if (CPU_IS_060) {
+		ret = cache_flush_060 (addr, scope, cache, len);
+	    }
+	}
+out:
+	return ret;
+}
+
+asmlinkage int sys_getpagesize(void)
+{
+	return PAGE_SIZE;
+}
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
+{
+	register long __res asm ("%d0") = __NR_execve;
+	register long __a asm ("%d1") = (long)(filename);
+	register long __b asm ("%d2") = (long)(argv);
+	register long __c asm ("%d3") = (long)(envp);
+	asm volatile ("trap  #0" : "+d" (__res)
+			: "d" (__a), "d" (__b), "d" (__c));
+	return __res;
+}
+
+asmlinkage unsigned long sys_get_thread_area(void)
+{
+	return current_thread_info()->tp_value;
+}
+
+asmlinkage int sys_set_thread_area(unsigned long tp)
+{
+	current_thread_info()->tp_value = tp;
+	return 0;
+}
+
+/* This syscall gets its arguments in A0 (mem), D2 (oldval) and
+   D1 (newval).  */
+asmlinkage int
+sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
+		      unsigned long __user * mem)
+{
+	/* This was borrowed from ARM's implementation.  */
+	for (;;) {
+		struct mm_struct *mm = current->mm;
+		pgd_t *pgd;
+		pmd_t *pmd;
+		pte_t *pte;
+		spinlock_t *ptl;
+		unsigned long mem_value;
+
+		down_read(&mm->mmap_sem);
+		pgd = pgd_offset(mm, (unsigned long)mem);
+		if (!pgd_present(*pgd))
+			goto bad_access;
+		pmd = pmd_offset(pgd, (unsigned long)mem);
+		if (!pmd_present(*pmd))
+			goto bad_access;
+		pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl);
+		if (!pte_present(*pte) || !pte_dirty(*pte)
+		    || !pte_write(*pte)) {
+			pte_unmap_unlock(pte, ptl);
+			goto bad_access;
+		}
+
+		mem_value = *mem;
+		if (mem_value == oldval)
+			*mem = newval;
+
+		pte_unmap_unlock(pte, ptl);
+		up_read(&mm->mmap_sem);
+		return mem_value;
+
+	      bad_access:
+		up_read(&mm->mmap_sem);
+		/* This is not necessarily a bad access, we can get here if
+		   a memory we're trying to write to should be copied-on-write.
+		   Make the kernel do the necessary page stuff, then re-iterate.
+		   Simulate a write access fault to do that.  */
+		{
+			/* The first argument of the function corresponds to
+			   D1, which is the first field of struct pt_regs.  */
+			struct pt_regs *fp = (struct pt_regs *)&newval;
+
+			/* '3' is an RMW flag.  */
+			if (do_page_fault(fp, (unsigned long)mem, 3))
+				/* If the do_page_fault() failed, we don't
+				   have anything meaningful to return.
+				   There should be a SIGSEGV pending for
+				   the process.  */
+				return 0xdeadbeef;
+		}
+	}
+}
+
+asmlinkage int sys_atomic_barrier(void)
+{
+	/* no code needed for uniprocs */
+	return 0;
+}

diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k_no.c
similarity index 100%
rename from arch/m68knommu/kernel/sys_m68k.c
rename to arch/m68k/kernel/sys_m68k_no.c


diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
similarity index 100%
rename from arch/m68knommu/kernel/syscalltable.S
rename to arch/m68k/kernel/syscalltable.S


diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 18b34ee..a5cf40c 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c

@@ -1,114 +1,5 @@
-/*
- *  linux/arch/m68k/kernel/time.c
- *
- *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
- *
- * This file contains the m68k-specific time handling details.
- * Most of the stuff is located in the machine specific files.
- *
- * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
- *		"A Kernel Model for Precision Timekeeping" by Dave Mills
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/rtc.h>
-#include <linux/platform_device.h>
-
-#include <asm/machdep.h>
-#include <asm/io.h>
-#include <asm/irq_regs.h>
-
-#include <linux/time.h>
-#include <linux/timex.h>
-#include <linux/profile.h>
-
-static inline int set_rtc_mmss(unsigned long nowtime)
-{
-  if (mach_set_clock_mmss)
-    return mach_set_clock_mmss (nowtime);
-  return -1;
-}
-
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "xtime_update()" routine every clocktick
- */
-static irqreturn_t timer_interrupt(int irq, void *dummy)
-{
-	xtime_update(1);
-	update_process_times(user_mode(get_irq_regs()));
-	profile_tick(CPU_PROFILING);
-
-#ifdef CONFIG_HEARTBEAT
-	/* use power LED as a heartbeat instead -- much more useful
-	   for debugging -- based on the version for PReP by Cort */
-	/* acts like an actual heart beat -- ie thump-thump-pause... */
-	if (mach_heartbeat) {
-	    static unsigned cnt = 0, period = 0, dist = 0;
-
-	    if (cnt == 0 || cnt == dist)
-		mach_heartbeat( 1 );
-	    else if (cnt == 7 || cnt == dist+7)
-		mach_heartbeat( 0 );
-
-	    if (++cnt > period) {
-		cnt = 0;
-		/* The hyperbolic function below modifies the heartbeat period
-		 * length in dependency of the current (5min) load. It goes
-		 * through the points f(0)=126, f(1)=86, f(5)=51,
-		 * f(inf)->30. */
-		period = ((672<<FSHIFT)/(5*avenrun[0]+(7<<FSHIFT))) + 30;
-		dist = period / 4;
-	    }
-	}
-#endif /* CONFIG_HEARTBEAT */
-	return IRQ_HANDLED;
-}
-
-void read_persistent_clock(struct timespec *ts)
-{
-	struct rtc_time time;
-	ts->tv_sec = 0;
-	ts->tv_nsec = 0;
-
-	if (mach_hwclk) {
-		mach_hwclk(0, &time);
-
-		if ((time.tm_year += 1900) < 1970)
-			time.tm_year += 100;
-		ts->tv_sec = mktime(time.tm_year, time.tm_mon, time.tm_mday,
-				      time.tm_hour, time.tm_min, time.tm_sec);
-	}
-}
-
-void __init time_init(void)
-{
-	mach_sched_init(timer_interrupt);
-}
-
-u32 arch_gettimeoffset(void)
-{
-	return mach_gettimeoffset() * 1000;
-}
-
-static int __init rtc_init(void)
-{
-	struct platform_device *pdev;
-
-	if (!mach_hwclk)
-		return -ENODEV;
-
-	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
-
-	return 0;
-}
-
-module_init(rtc_init);
+#ifdef CONFIG_MMU
+#include "time_mm.c"
+#else
+#include "time_no.c"
+#endif

diff --git a/arch/m68k/kernel/time_mm.c b/arch/m68k/kernel/time_mm.c
new file mode 100644
index 0000000..18b34ee
--- /dev/null
+++ b/arch/m68k/kernel/time_mm.c

@@ -0,0 +1,114 @@
+/*
+ *  linux/arch/m68k/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *
+ * This file contains the m68k-specific time handling details.
+ * Most of the stuff is located in the machine specific files.
+ *
+ * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
+ *		"A Kernel Model for Precision Timekeeping" by Dave Mills
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/profile.h>
+
+static inline int set_rtc_mmss(unsigned long nowtime)
+{
+  if (mach_set_clock_mmss)
+    return mach_set_clock_mmss (nowtime);
+  return -1;
+}
+
+/*
+ * timer_interrupt() needs to keep up the real-time clock,
+ * as well as call the "xtime_update()" routine every clocktick
+ */
+static irqreturn_t timer_interrupt(int irq, void *dummy)
+{
+	xtime_update(1);
+	update_process_times(user_mode(get_irq_regs()));
+	profile_tick(CPU_PROFILING);
+
+#ifdef CONFIG_HEARTBEAT
+	/* use power LED as a heartbeat instead -- much more useful
+	   for debugging -- based on the version for PReP by Cort */
+	/* acts like an actual heart beat -- ie thump-thump-pause... */
+	if (mach_heartbeat) {
+	    static unsigned cnt = 0, period = 0, dist = 0;
+
+	    if (cnt == 0 || cnt == dist)
+		mach_heartbeat( 1 );
+	    else if (cnt == 7 || cnt == dist+7)
+		mach_heartbeat( 0 );
+
+	    if (++cnt > period) {
+		cnt = 0;
+		/* The hyperbolic function below modifies the heartbeat period
+		 * length in dependency of the current (5min) load. It goes
+		 * through the points f(0)=126, f(1)=86, f(5)=51,
+		 * f(inf)->30. */
+		period = ((672<<FSHIFT)/(5*avenrun[0]+(7<<FSHIFT))) + 30;
+		dist = period / 4;
+	    }
+	}
+#endif /* CONFIG_HEARTBEAT */
+	return IRQ_HANDLED;
+}
+
+void read_persistent_clock(struct timespec *ts)
+{
+	struct rtc_time time;
+	ts->tv_sec = 0;
+	ts->tv_nsec = 0;
+
+	if (mach_hwclk) {
+		mach_hwclk(0, &time);
+
+		if ((time.tm_year += 1900) < 1970)
+			time.tm_year += 100;
+		ts->tv_sec = mktime(time.tm_year, time.tm_mon, time.tm_mday,
+				      time.tm_hour, time.tm_min, time.tm_sec);
+	}
+}
+
+void __init time_init(void)
+{
+	mach_sched_init(timer_interrupt);
+}
+
+u32 arch_gettimeoffset(void)
+{
+	return mach_gettimeoffset() * 1000;
+}
+
+static int __init rtc_init(void)
+{
+	struct platform_device *pdev;
+
+	if (!mach_hwclk)
+		return -ENODEV;
+
+	pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	return 0;
+}
+
+module_init(rtc_init);

diff --git a/arch/m68knommu/kernel/time.c b/arch/m68k/kernel/time_no.c
similarity index 100%
rename from arch/m68knommu/kernel/time.c
rename to arch/m68k/kernel/time_no.c


diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 4022bbc..c98add3 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c

@@ -1,1207 +1,5 @@
-/*
- *  linux/arch/m68k/kernel/traps.c
- *
- *  Copyright (C) 1993, 1994 by Hamish Macdonald
- *
- *  68040 fixes by Michael Rausch
- *  68040 fixes by Martin Apel
- *  68040 fixes and writeback by Richard Zidlicky
- *  68060 fixes by Roman Hodek
- *  68060 fixes by Jesper Skov
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-/*
- * Sets up all exception vectors
- */
-
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/user.h>
-#include <linux/string.h>
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <linux/ptrace.h>
-#include <linux/kallsyms.h>
-
-#include <asm/setup.h>
-#include <asm/fpu.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/traps.h>
-#include <asm/pgalloc.h>
-#include <asm/machdep.h>
-#include <asm/siginfo.h>
-
-/* assembler routines */
-asmlinkage void system_call(void);
-asmlinkage void buserr(void);
-asmlinkage void trap(void);
-asmlinkage void nmihandler(void);
-#ifdef CONFIG_M68KFPU_EMU
-asmlinkage void fpu_emu(void);
-#endif
-
-e_vector vectors[256];
-
-/* nmi handler for the Amiga */
-asm(".text\n"
-    __ALIGN_STR "\n"
-    "nmihandler: rte");
-
-/*
- * this must be called very early as the kernel might
- * use some instruction that are emulated on the 060
- * and so we're prepared for early probe attempts (e.g. nf_init).
- */
-void __init base_trap_init(void)
-{
-	if (MACH_IS_SUN3X) {
-		extern e_vector *sun3x_prom_vbr;
-
-		__asm__ volatile ("movec %%vbr, %0" : "=r" (sun3x_prom_vbr));
-	}
-
-	/* setup the exception vector table */
-	__asm__ volatile ("movec %0,%%vbr" : : "r" ((void*)vectors));
-
-	if (CPU_IS_060) {
-		/* set up ISP entry points */
-		asmlinkage void unimp_vec(void) asm ("_060_isp_unimp");
-
-		vectors[VEC_UNIMPII] = unimp_vec;
-	}
-
-	vectors[VEC_BUSERR] = buserr;
-	vectors[VEC_ILLEGAL] = trap;
-	vectors[VEC_SYS] = system_call;
-}
-
-void __init trap_init (void)
-{
-	int i;
-
-	for (i = VEC_SPUR; i <= VEC_INT7; i++)
-		vectors[i] = bad_inthandler;
-
-	for (i = 0; i < VEC_USER; i++)
-		if (!vectors[i])
-			vectors[i] = trap;
-
-	for (i = VEC_USER; i < 256; i++)
-		vectors[i] = bad_inthandler;
-
-#ifdef CONFIG_M68KFPU_EMU
-	if (FPU_IS_EMU)
-		vectors[VEC_LINE11] = fpu_emu;
-#endif
-
-	if (CPU_IS_040 && !FPU_IS_EMU) {
-		/* set up FPSP entry points */
-		asmlinkage void dz_vec(void) asm ("dz");
-		asmlinkage void inex_vec(void) asm ("inex");
-		asmlinkage void ovfl_vec(void) asm ("ovfl");
-		asmlinkage void unfl_vec(void) asm ("unfl");
-		asmlinkage void snan_vec(void) asm ("snan");
-		asmlinkage void operr_vec(void) asm ("operr");
-		asmlinkage void bsun_vec(void) asm ("bsun");
-		asmlinkage void fline_vec(void) asm ("fline");
-		asmlinkage void unsupp_vec(void) asm ("unsupp");
-
-		vectors[VEC_FPDIVZ] = dz_vec;
-		vectors[VEC_FPIR] = inex_vec;
-		vectors[VEC_FPOVER] = ovfl_vec;
-		vectors[VEC_FPUNDER] = unfl_vec;
-		vectors[VEC_FPNAN] = snan_vec;
-		vectors[VEC_FPOE] = operr_vec;
-		vectors[VEC_FPBRUC] = bsun_vec;
-		vectors[VEC_LINE11] = fline_vec;
-		vectors[VEC_FPUNSUP] = unsupp_vec;
-	}
-
-	if (CPU_IS_060 && !FPU_IS_EMU) {
-		/* set up IFPSP entry points */
-		asmlinkage void snan_vec6(void) asm ("_060_fpsp_snan");
-		asmlinkage void operr_vec6(void) asm ("_060_fpsp_operr");
-		asmlinkage void ovfl_vec6(void) asm ("_060_fpsp_ovfl");
-		asmlinkage void unfl_vec6(void) asm ("_060_fpsp_unfl");
-		asmlinkage void dz_vec6(void) asm ("_060_fpsp_dz");
-		asmlinkage void inex_vec6(void) asm ("_060_fpsp_inex");
-		asmlinkage void fline_vec6(void) asm ("_060_fpsp_fline");
-		asmlinkage void unsupp_vec6(void) asm ("_060_fpsp_unsupp");
-		asmlinkage void effadd_vec6(void) asm ("_060_fpsp_effadd");
-
-		vectors[VEC_FPNAN] = snan_vec6;
-		vectors[VEC_FPOE] = operr_vec6;
-		vectors[VEC_FPOVER] = ovfl_vec6;
-		vectors[VEC_FPUNDER] = unfl_vec6;
-		vectors[VEC_FPDIVZ] = dz_vec6;
-		vectors[VEC_FPIR] = inex_vec6;
-		vectors[VEC_LINE11] = fline_vec6;
-		vectors[VEC_FPUNSUP] = unsupp_vec6;
-		vectors[VEC_UNIMPEA] = effadd_vec6;
-	}
-
-        /* if running on an amiga, make the NMI interrupt do nothing */
-	if (MACH_IS_AMIGA) {
-		vectors[VEC_INT7] = nmihandler;
-	}
-}
-
-
-static const char *vec_names[] = {
-	[VEC_RESETSP]	= "RESET SP",
-	[VEC_RESETPC]	= "RESET PC",
-	[VEC_BUSERR]	= "BUS ERROR",
-	[VEC_ADDRERR]	= "ADDRESS ERROR",
-	[VEC_ILLEGAL]	= "ILLEGAL INSTRUCTION",
-	[VEC_ZERODIV]	= "ZERO DIVIDE",
-	[VEC_CHK]	= "CHK",
-	[VEC_TRAP]	= "TRAPcc",
-	[VEC_PRIV]	= "PRIVILEGE VIOLATION",
-	[VEC_TRACE]	= "TRACE",
-	[VEC_LINE10]	= "LINE 1010",
-	[VEC_LINE11]	= "LINE 1111",
-	[VEC_RESV12]	= "UNASSIGNED RESERVED 12",
-	[VEC_COPROC]	= "COPROCESSOR PROTOCOL VIOLATION",
-	[VEC_FORMAT]	= "FORMAT ERROR",
-	[VEC_UNINT]	= "UNINITIALIZED INTERRUPT",
-	[VEC_RESV16]	= "UNASSIGNED RESERVED 16",
-	[VEC_RESV17]	= "UNASSIGNED RESERVED 17",
-	[VEC_RESV18]	= "UNASSIGNED RESERVED 18",
-	[VEC_RESV19]	= "UNASSIGNED RESERVED 19",
-	[VEC_RESV20]	= "UNASSIGNED RESERVED 20",
-	[VEC_RESV21]	= "UNASSIGNED RESERVED 21",
-	[VEC_RESV22]	= "UNASSIGNED RESERVED 22",
-	[VEC_RESV23]	= "UNASSIGNED RESERVED 23",
-	[VEC_SPUR]	= "SPURIOUS INTERRUPT",
-	[VEC_INT1]	= "LEVEL 1 INT",
-	[VEC_INT2]	= "LEVEL 2 INT",
-	[VEC_INT3]	= "LEVEL 3 INT",
-	[VEC_INT4]	= "LEVEL 4 INT",
-	[VEC_INT5]	= "LEVEL 5 INT",
-	[VEC_INT6]	= "LEVEL 6 INT",
-	[VEC_INT7]	= "LEVEL 7 INT",
-	[VEC_SYS]	= "SYSCALL",
-	[VEC_TRAP1]	= "TRAP #1",
-	[VEC_TRAP2]	= "TRAP #2",
-	[VEC_TRAP3]	= "TRAP #3",
-	[VEC_TRAP4]	= "TRAP #4",
-	[VEC_TRAP5]	= "TRAP #5",
-	[VEC_TRAP6]	= "TRAP #6",
-	[VEC_TRAP7]	= "TRAP #7",
-	[VEC_TRAP8]	= "TRAP #8",
-	[VEC_TRAP9]	= "TRAP #9",
-	[VEC_TRAP10]	= "TRAP #10",
-	[VEC_TRAP11]	= "TRAP #11",
-	[VEC_TRAP12]	= "TRAP #12",
-	[VEC_TRAP13]	= "TRAP #13",
-	[VEC_TRAP14]	= "TRAP #14",
-	[VEC_TRAP15]	= "TRAP #15",
-	[VEC_FPBRUC]	= "FPCP BSUN",
-	[VEC_FPIR]	= "FPCP INEXACT",
-	[VEC_FPDIVZ]	= "FPCP DIV BY 0",
-	[VEC_FPUNDER]	= "FPCP UNDERFLOW",
-	[VEC_FPOE]	= "FPCP OPERAND ERROR",
-	[VEC_FPOVER]	= "FPCP OVERFLOW",
-	[VEC_FPNAN]	= "FPCP SNAN",
-	[VEC_FPUNSUP]	= "FPCP UNSUPPORTED OPERATION",
-	[VEC_MMUCFG]	= "MMU CONFIGURATION ERROR",
-	[VEC_MMUILL]	= "MMU ILLEGAL OPERATION ERROR",
-	[VEC_MMUACC]	= "MMU ACCESS LEVEL VIOLATION ERROR",
-	[VEC_RESV59]	= "UNASSIGNED RESERVED 59",
-	[VEC_UNIMPEA]	= "UNASSIGNED RESERVED 60",
-	[VEC_UNIMPII]	= "UNASSIGNED RESERVED 61",
-	[VEC_RESV62]	= "UNASSIGNED RESERVED 62",
-	[VEC_RESV63]	= "UNASSIGNED RESERVED 63",
-};
-
-static const char *space_names[] = {
-	[0]		= "Space 0",
-	[USER_DATA]	= "User Data",
-	[USER_PROGRAM]	= "User Program",
-#ifndef CONFIG_SUN3
-	[3]		= "Space 3",
+#ifdef CONFIG_MMU
+#include "traps_mm.c"
 #else
-	[FC_CONTROL]	= "Control",
-#endif
-	[4]		= "Space 4",
-	[SUPER_DATA]	= "Super Data",
-	[SUPER_PROGRAM]	= "Super Program",
-	[CPU_SPACE]	= "CPU"
-};
-
-void die_if_kernel(char *,struct pt_regs *,int);
-asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
-                             unsigned long error_code);
-int send_fault_sig(struct pt_regs *regs);
-
-asmlinkage void trap_c(struct frame *fp);
-
-#if defined (CONFIG_M68060)
-static inline void access_error060 (struct frame *fp)
-{
-	unsigned long fslw = fp->un.fmt4.pc; /* is really FSLW for access error */
-
-#ifdef DEBUG
-	printk("fslw=%#lx, fa=%#lx\n", fslw, fp->un.fmt4.effaddr);
-#endif
-
-	if (fslw & MMU060_BPE) {
-		/* branch prediction error -> clear branch cache */
-		__asm__ __volatile__ ("movec %/cacr,%/d0\n\t"
-				      "orl   #0x00400000,%/d0\n\t"
-				      "movec %/d0,%/cacr"
-				      : : : "d0" );
-		/* return if there's no other error */
-		if (!(fslw & MMU060_ERR_BITS) && !(fslw & MMU060_SEE))
-			return;
-	}
-
-	if (fslw & (MMU060_DESC_ERR | MMU060_WP | MMU060_SP)) {
-		unsigned long errorcode;
-		unsigned long addr = fp->un.fmt4.effaddr;
-
-		if (fslw & MMU060_MA)
-			addr = (addr + PAGE_SIZE - 1) & PAGE_MASK;
-
-		errorcode = 1;
-		if (fslw & MMU060_DESC_ERR) {
-			__flush_tlb040_one(addr);
-			errorcode = 0;
-		}
-		if (fslw & MMU060_W)
-			errorcode |= 2;
-#ifdef DEBUG
-		printk("errorcode = %d\n", errorcode );
-#endif
-		do_page_fault(&fp->ptregs, addr, errorcode);
-	} else if (fslw & (MMU060_SEE)){
-		/* Software Emulation Error.
-		 * fault during mem_read/mem_write in ifpsp060/os.S
-		 */
-		send_fault_sig(&fp->ptregs);
-	} else if (!(fslw & (MMU060_RE|MMU060_WE)) ||
-		   send_fault_sig(&fp->ptregs) > 0) {
-		printk("pc=%#lx, fa=%#lx\n", fp->ptregs.pc, fp->un.fmt4.effaddr);
-		printk( "68060 access error, fslw=%lx\n", fslw );
-		trap_c( fp );
-	}
-}
-#endif /* CONFIG_M68060 */
-
-#if defined (CONFIG_M68040)
-static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs)
-{
-	unsigned long mmusr;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(MAKE_MM_SEG(wbs));
-
-	if (iswrite)
-		asm volatile (".chip 68040; ptestw (%0); .chip 68k" : : "a" (addr));
-	else
-		asm volatile (".chip 68040; ptestr (%0); .chip 68k" : : "a" (addr));
-
-	asm volatile (".chip 68040; movec %%mmusr,%0; .chip 68k" : "=r" (mmusr));
-
-	set_fs(old_fs);
-
-	return mmusr;
-}
-
-static inline int do_040writeback1(unsigned short wbs, unsigned long wba,
-				   unsigned long wbd)
-{
-	int res = 0;
-	mm_segment_t old_fs = get_fs();
-
-	/* set_fs can not be moved, otherwise put_user() may oops */
-	set_fs(MAKE_MM_SEG(wbs));
-
-	switch (wbs & WBSIZ_040) {
-	case BA_SIZE_BYTE:
-		res = put_user(wbd & 0xff, (char __user *)wba);
-		break;
-	case BA_SIZE_WORD:
-		res = put_user(wbd & 0xffff, (short __user *)wba);
-		break;
-	case BA_SIZE_LONG:
-		res = put_user(wbd, (int __user *)wba);
-		break;
-	}
-
-	/* set_fs can not be moved, otherwise put_user() may oops */
-	set_fs(old_fs);
-
-
-#ifdef DEBUG
-	printk("do_040writeback1, res=%d\n",res);
-#endif
-
-	return res;
-}
-
-/* after an exception in a writeback the stack frame corresponding
- * to that exception is discarded, set a few bits in the old frame
- * to simulate what it should look like
- */
-static inline void fix_xframe040(struct frame *fp, unsigned long wba, unsigned short wbs)
-{
-	fp->un.fmt7.faddr = wba;
-	fp->un.fmt7.ssw = wbs & 0xff;
-	if (wba != current->thread.faddr)
-	    fp->un.fmt7.ssw |= MA_040;
-}
-
-static inline void do_040writebacks(struct frame *fp)
-{
-	int res = 0;
-#if 0
-	if (fp->un.fmt7.wb1s & WBV_040)
-		printk("access_error040: cannot handle 1st writeback. oops.\n");
-#endif
-
-	if ((fp->un.fmt7.wb2s & WBV_040) &&
-	    !(fp->un.fmt7.wb2s & WBTT_040)) {
-		res = do_040writeback1(fp->un.fmt7.wb2s, fp->un.fmt7.wb2a,
-				       fp->un.fmt7.wb2d);
-		if (res)
-			fix_xframe040(fp, fp->un.fmt7.wb2a, fp->un.fmt7.wb2s);
-		else
-			fp->un.fmt7.wb2s = 0;
-	}
-
-	/* do the 2nd wb only if the first one was successful (except for a kernel wb) */
-	if (fp->un.fmt7.wb3s & WBV_040 && (!res || fp->un.fmt7.wb3s & 4)) {
-		res = do_040writeback1(fp->un.fmt7.wb3s, fp->un.fmt7.wb3a,
-				       fp->un.fmt7.wb3d);
-		if (res)
-		    {
-			fix_xframe040(fp, fp->un.fmt7.wb3a, fp->un.fmt7.wb3s);
-
-			fp->un.fmt7.wb2s = fp->un.fmt7.wb3s;
-			fp->un.fmt7.wb3s &= (~WBV_040);
-			fp->un.fmt7.wb2a = fp->un.fmt7.wb3a;
-			fp->un.fmt7.wb2d = fp->un.fmt7.wb3d;
-		    }
-		else
-			fp->un.fmt7.wb3s = 0;
-	}
-
-	if (res)
-		send_fault_sig(&fp->ptregs);
-}
-
-/*
- * called from sigreturn(), must ensure userspace code didn't
- * manipulate exception frame to circumvent protection, then complete
- * pending writebacks
- * we just clear TM2 to turn it into a userspace access
- */
-asmlinkage void berr_040cleanup(struct frame *fp)
-{
-	fp->un.fmt7.wb2s &= ~4;
-	fp->un.fmt7.wb3s &= ~4;
-
-	do_040writebacks(fp);
-}
-
-static inline void access_error040(struct frame *fp)
-{
-	unsigned short ssw = fp->un.fmt7.ssw;
-	unsigned long mmusr;
-
-#ifdef DEBUG
-	printk("ssw=%#x, fa=%#lx\n", ssw, fp->un.fmt7.faddr);
-        printk("wb1s=%#x, wb2s=%#x, wb3s=%#x\n", fp->un.fmt7.wb1s,
-		fp->un.fmt7.wb2s, fp->un.fmt7.wb3s);
-	printk ("wb2a=%lx, wb3a=%lx, wb2d=%lx, wb3d=%lx\n",
-		fp->un.fmt7.wb2a, fp->un.fmt7.wb3a,
-		fp->un.fmt7.wb2d, fp->un.fmt7.wb3d);
-#endif
-
-	if (ssw & ATC_040) {
-		unsigned long addr = fp->un.fmt7.faddr;
-		unsigned long errorcode;
-
-		/*
-		 * The MMU status has to be determined AFTER the address
-		 * has been corrected if there was a misaligned access (MA).
-		 */
-		if (ssw & MA_040)
-			addr = (addr + 7) & -8;
-
-		/* MMU error, get the MMUSR info for this access */
-		mmusr = probe040(!(ssw & RW_040), addr, ssw);
-#ifdef DEBUG
-		printk("mmusr = %lx\n", mmusr);
-#endif
-		errorcode = 1;
-		if (!(mmusr & MMU_R_040)) {
-			/* clear the invalid atc entry */
-			__flush_tlb040_one(addr);
-			errorcode = 0;
-		}
-
-		/* despite what documentation seems to say, RMW
-		 * accesses have always both the LK and RW bits set */
-		if (!(ssw & RW_040) || (ssw & LK_040))
-			errorcode |= 2;
-
-		if (do_page_fault(&fp->ptregs, addr, errorcode)) {
-#ifdef DEBUG
-			printk("do_page_fault() !=0\n");
-#endif
-			if (user_mode(&fp->ptregs)){
-				/* delay writebacks after signal delivery */
-#ifdef DEBUG
-			        printk(".. was usermode - return\n");
-#endif
-				return;
-			}
-			/* disable writeback into user space from kernel
-			 * (if do_page_fault didn't fix the mapping,
-                         * the writeback won't do good)
-			 */
-disable_wb:
-#ifdef DEBUG
-			printk(".. disabling wb2\n");
-#endif
-			if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr)
-				fp->un.fmt7.wb2s &= ~WBV_040;
-			if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr)
-				fp->un.fmt7.wb3s &= ~WBV_040;
-		}
-	} else {
-		/* In case of a bus error we either kill the process or expect
-		 * the kernel to catch the fault, which then is also responsible
-		 * for cleaning up the mess.
-		 */
-		current->thread.signo = SIGBUS;
-		current->thread.faddr = fp->un.fmt7.faddr;
-		if (send_fault_sig(&fp->ptregs) >= 0)
-			printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
-			       fp->un.fmt7.faddr);
-		goto disable_wb;
-	}
-
-	do_040writebacks(fp);
-}
-#endif /* CONFIG_M68040 */
-
-#if defined(CONFIG_SUN3)
-#include <asm/sun3mmu.h>
-
-extern int mmu_emu_handle_fault (unsigned long, int, int);
-
-/* sun3 version of bus_error030 */
-
-static inline void bus_error030 (struct frame *fp)
-{
-	unsigned char buserr_type = sun3_get_buserr ();
-	unsigned long addr, errorcode;
-	unsigned short ssw = fp->un.fmtb.ssw;
-	extern unsigned long _sun3_map_test_start, _sun3_map_test_end;
-
-#ifdef DEBUG
-	if (ssw & (FC | FB))
-		printk ("Instruction fault at %#010lx\n",
-			ssw & FC ?
-			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
-			:
-			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
-	if (ssw & DF)
-		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-			ssw & RW ? "read" : "write",
-			fp->un.fmtb.daddr,
-			space_names[ssw & DFC], fp->ptregs.pc);
-#endif
-
-	/*
-	 * Check if this page should be demand-mapped. This needs to go before
-	 * the testing for a bad kernel-space access (demand-mapping applies
-	 * to kernel accesses too).
-	 */
-
-	if ((ssw & DF)
-	    && (buserr_type & (SUN3_BUSERR_PROTERR | SUN3_BUSERR_INVALID))) {
-		if (mmu_emu_handle_fault (fp->un.fmtb.daddr, ssw & RW, 0))
-			return;
-	}
-
-	/* Check for kernel-space pagefault (BAD). */
-	if (fp->ptregs.sr & PS_S) {
-		/* kernel fault must be a data fault to user space */
-		if (! ((ssw & DF) && ((ssw & DFC) == USER_DATA))) {
-		     // try checking the kernel mappings before surrender
-		     if (mmu_emu_handle_fault (fp->un.fmtb.daddr, ssw & RW, 1))
-			  return;
-			/* instruction fault or kernel data fault! */
-			if (ssw & (FC | FB))
-				printk ("Instruction fault at %#010lx\n",
-					fp->ptregs.pc);
-			if (ssw & DF) {
-				/* was this fault incurred testing bus mappings? */
-				if((fp->ptregs.pc >= (unsigned long)&_sun3_map_test_start) &&
-				   (fp->ptregs.pc <= (unsigned long)&_sun3_map_test_end)) {
-					send_fault_sig(&fp->ptregs);
-					return;
-				}
-
-				printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-					ssw & RW ? "read" : "write",
-					fp->un.fmtb.daddr,
-					space_names[ssw & DFC], fp->ptregs.pc);
-			}
-			printk ("BAD KERNEL BUSERR\n");
-
-			die_if_kernel("Oops", &fp->ptregs,0);
-			force_sig(SIGKILL, current);
-			return;
-		}
-	} else {
-		/* user fault */
-		if (!(ssw & (FC | FB)) && !(ssw & DF))
-			/* not an instruction fault or data fault! BAD */
-			panic ("USER BUSERR w/o instruction or data fault");
-	}
-
-
-	/* First handle the data fault, if any.  */
-	if (ssw & DF) {
-		addr = fp->un.fmtb.daddr;
-
-// errorcode bit 0:	0 -> no page		1 -> protection fault
-// errorcode bit 1:	0 -> read fault		1 -> write fault
-
-// (buserr_type & SUN3_BUSERR_PROTERR)	-> protection fault
-// (buserr_type & SUN3_BUSERR_INVALID)	-> invalid page fault
-
-		if (buserr_type & SUN3_BUSERR_PROTERR)
-			errorcode = 0x01;
-		else if (buserr_type & SUN3_BUSERR_INVALID)
-			errorcode = 0x00;
-		else {
-#ifdef DEBUG
-			printk ("*** unexpected busfault type=%#04x\n", buserr_type);
-			printk ("invalid %s access at %#lx from pc %#lx\n",
-				!(ssw & RW) ? "write" : "read", addr,
-				fp->ptregs.pc);
-#endif
-			die_if_kernel ("Oops", &fp->ptregs, buserr_type);
-			force_sig (SIGBUS, current);
-			return;
-		}
-
-//todo: wtf is RM bit? --m
-		if (!(ssw & RW) || ssw & RM)
-			errorcode |= 0x02;
-
-		/* Handle page fault. */
-		do_page_fault (&fp->ptregs, addr, errorcode);
-
-		/* Retry the data fault now. */
-		return;
-	}
-
-	/* Now handle the instruction fault. */
-
-	/* Get the fault address. */
-	if (fp->ptregs.format == 0xA)
-		addr = fp->ptregs.pc + 4;
-	else
-		addr = fp->un.fmtb.baddr;
-	if (ssw & FC)
-		addr -= 2;
-
-	if (buserr_type & SUN3_BUSERR_INVALID) {
-		if (!mmu_emu_handle_fault (fp->un.fmtb.daddr, 1, 0))
-			do_page_fault (&fp->ptregs, addr, 0);
-       } else {
-#ifdef DEBUG
-		printk ("protection fault on insn access (segv).\n");
-#endif
-		force_sig (SIGSEGV, current);
-       }
-}
-#else
-#if defined(CPU_M68020_OR_M68030)
-static inline void bus_error030 (struct frame *fp)
-{
-	volatile unsigned short temp;
-	unsigned short mmusr;
-	unsigned long addr, errorcode;
-	unsigned short ssw = fp->un.fmtb.ssw;
-#ifdef DEBUG
-	unsigned long desc;
-
-	printk ("pid = %x  ", current->pid);
-	printk ("SSW=%#06x  ", ssw);
-
-	if (ssw & (FC | FB))
-		printk ("Instruction fault at %#010lx\n",
-			ssw & FC ?
-			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
-			:
-			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
-	if (ssw & DF)
-		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-			ssw & RW ? "read" : "write",
-			fp->un.fmtb.daddr,
-			space_names[ssw & DFC], fp->ptregs.pc);
-#endif
-
-	/* ++andreas: If a data fault and an instruction fault happen
-	   at the same time map in both pages.  */
-
-	/* First handle the data fault, if any.  */
-	if (ssw & DF) {
-		addr = fp->un.fmtb.daddr;
-
-#ifdef DEBUG
-		asm volatile ("ptestr %3,%2@,#7,%0\n\t"
-			      "pmove %%psr,%1@"
-			      : "=a&" (desc)
-			      : "a" (&temp), "a" (addr), "d" (ssw));
-#else
-		asm volatile ("ptestr %2,%1@,#7\n\t"
-			      "pmove %%psr,%0@"
-			      : : "a" (&temp), "a" (addr), "d" (ssw));
-#endif
-		mmusr = temp;
-
-#ifdef DEBUG
-		printk("mmusr is %#x for addr %#lx in task %p\n",
-		       mmusr, addr, current);
-		printk("descriptor address is %#lx, contents %#lx\n",
-		       __va(desc), *(unsigned long *)__va(desc));
-#endif
-
-		errorcode = (mmusr & MMU_I) ? 0 : 1;
-		if (!(ssw & RW) || (ssw & RM))
-			errorcode |= 2;
-
-		if (mmusr & (MMU_I | MMU_WP)) {
-			if (ssw & 4) {
-				printk("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-				       ssw & RW ? "read" : "write",
-				       fp->un.fmtb.daddr,
-				       space_names[ssw & DFC], fp->ptregs.pc);
-				goto buserr;
-			}
-			/* Don't try to do anything further if an exception was
-			   handled. */
-			if (do_page_fault (&fp->ptregs, addr, errorcode) < 0)
-				return;
-		} else if (!(mmusr & MMU_I)) {
-			/* probably a 020 cas fault */
-			if (!(ssw & RM) && send_fault_sig(&fp->ptregs) > 0)
-				printk("unexpected bus error (%#x,%#x)\n", ssw, mmusr);
-		} else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
-			printk("invalid %s access at %#lx from pc %#lx\n",
-			       !(ssw & RW) ? "write" : "read", addr,
-			       fp->ptregs.pc);
-			die_if_kernel("Oops",&fp->ptregs,mmusr);
-			force_sig(SIGSEGV, current);
-			return;
-		} else {
-#if 0
-			static volatile long tlong;
-#endif
-
-			printk("weird %s access at %#lx from pc %#lx (ssw is %#x)\n",
-			       !(ssw & RW) ? "write" : "read", addr,
-			       fp->ptregs.pc, ssw);
-			asm volatile ("ptestr #1,%1@,#0\n\t"
-				      "pmove %%psr,%0@"
-				      : /* no outputs */
-				      : "a" (&temp), "a" (addr));
-			mmusr = temp;
-
-			printk ("level 0 mmusr is %#x\n", mmusr);
-#if 0
-			asm volatile ("pmove %%tt0,%0@"
-				      : /* no outputs */
-				      : "a" (&tlong));
-			printk("tt0 is %#lx, ", tlong);
-			asm volatile ("pmove %%tt1,%0@"
-				      : /* no outputs */
-				      : "a" (&tlong));
-			printk("tt1 is %#lx\n", tlong);
-#endif
-#ifdef DEBUG
-			printk("Unknown SIGSEGV - 1\n");
-#endif
-			die_if_kernel("Oops",&fp->ptregs,mmusr);
-			force_sig(SIGSEGV, current);
-			return;
-		}
-
-		/* setup an ATC entry for the access about to be retried */
-		if (!(ssw & RW) || (ssw & RM))
-			asm volatile ("ploadw %1,%0@" : /* no outputs */
-				      : "a" (addr), "d" (ssw));
-		else
-			asm volatile ("ploadr %1,%0@" : /* no outputs */
-				      : "a" (addr), "d" (ssw));
-	}
-
-	/* Now handle the instruction fault. */
-
-	if (!(ssw & (FC|FB)))
-		return;
-
-	if (fp->ptregs.sr & PS_S) {
-		printk("Instruction fault at %#010lx\n",
-			fp->ptregs.pc);
-	buserr:
-		printk ("BAD KERNEL BUSERR\n");
-		die_if_kernel("Oops",&fp->ptregs,0);
-		force_sig(SIGKILL, current);
-		return;
-	}
-
-	/* get the fault address */
-	if (fp->ptregs.format == 10)
-		addr = fp->ptregs.pc + 4;
-	else
-		addr = fp->un.fmtb.baddr;
-	if (ssw & FC)
-		addr -= 2;
-
-	if ((ssw & DF) && ((addr ^ fp->un.fmtb.daddr) & PAGE_MASK) == 0)
-		/* Insn fault on same page as data fault.  But we
-		   should still create the ATC entry.  */
-		goto create_atc_entry;
-
-#ifdef DEBUG
-	asm volatile ("ptestr #1,%2@,#7,%0\n\t"
-		      "pmove %%psr,%1@"
-		      : "=a&" (desc)
-		      : "a" (&temp), "a" (addr));
-#else
-	asm volatile ("ptestr #1,%1@,#7\n\t"
-		      "pmove %%psr,%0@"
-		      : : "a" (&temp), "a" (addr));
-#endif
-	mmusr = temp;
-
-#ifdef DEBUG
-	printk ("mmusr is %#x for addr %#lx in task %p\n",
-		mmusr, addr, current);
-	printk ("descriptor address is %#lx, contents %#lx\n",
-		__va(desc), *(unsigned long *)__va(desc));
-#endif
-
-	if (mmusr & MMU_I)
-		do_page_fault (&fp->ptregs, addr, 0);
-	else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
-		printk ("invalid insn access at %#lx from pc %#lx\n",
-			addr, fp->ptregs.pc);
-#ifdef DEBUG
-		printk("Unknown SIGSEGV - 2\n");
-#endif
-		die_if_kernel("Oops",&fp->ptregs,mmusr);
-		force_sig(SIGSEGV, current);
-		return;
-	}
-
-create_atc_entry:
-	/* setup an ATC entry for the access about to be retried */
-	asm volatile ("ploadr #2,%0@" : /* no outputs */
-		      : "a" (addr));
-}
-#endif /* CPU_M68020_OR_M68030 */
-#endif /* !CONFIG_SUN3 */
-
-asmlinkage void buserr_c(struct frame *fp)
-{
-	/* Only set esp0 if coming from user mode */
-	if (user_mode(&fp->ptregs))
-		current->thread.esp0 = (unsigned long) fp;
-
-#ifdef DEBUG
-	printk ("*** Bus Error *** Format is %x\n", fp->ptregs.format);
-#endif
-
-	switch (fp->ptregs.format) {
-#if defined (CONFIG_M68060)
-	case 4:				/* 68060 access error */
-	  access_error060 (fp);
-	  break;
-#endif
-#if defined (CONFIG_M68040)
-	case 0x7:			/* 68040 access error */
-	  access_error040 (fp);
-	  break;
-#endif
-#if defined (CPU_M68020_OR_M68030)
-	case 0xa:
-	case 0xb:
-	  bus_error030 (fp);
-	  break;
-#endif
-	default:
-	  die_if_kernel("bad frame format",&fp->ptregs,0);
-#ifdef DEBUG
-	  printk("Unknown SIGSEGV - 4\n");
-#endif
-	  force_sig(SIGSEGV, current);
-	}
-}
-
-
-static int kstack_depth_to_print = 48;
-
-void show_trace(unsigned long *stack)
-{
-	unsigned long *endstack;
-	unsigned long addr;
-	int i;
-
-	printk("Call Trace:");
-	addr = (unsigned long)stack + THREAD_SIZE - 1;
-	endstack = (unsigned long *)(addr & -THREAD_SIZE);
-	i = 0;
-	while (stack + 1 <= endstack) {
-		addr = *stack++;
-		/*
-		 * If the address is either in the text segment of the
-		 * kernel, or in the region which contains vmalloc'ed
-		 * memory, it *may* be the address of a calling
-		 * routine; if so, print it so that someone tracing
-		 * down the cause of the crash will be able to figure
-		 * out the call path that was taken.
-		 */
-		if (__kernel_text_address(addr)) {
-#ifndef CONFIG_KALLSYMS
-			if (i % 5 == 0)
-				printk("\n       ");
-#endif
-			printk(" [<%08lx>] %pS\n", addr, (void *)addr);
-			i++;
-		}
-	}
-	printk("\n");
-}
-
-void show_registers(struct pt_regs *regs)
-{
-	struct frame *fp = (struct frame *)regs;
-	mm_segment_t old_fs = get_fs();
-	u16 c, *cp;
-	unsigned long addr;
-	int i;
-
-	print_modules();
-	printk("PC: [<%08lx>] %pS\n", regs->pc, (void *)regs->pc);
-	printk("SR: %04x  SP: %p  a2: %08lx\n", regs->sr, regs, regs->a2);
-	printk("d0: %08lx    d1: %08lx    d2: %08lx    d3: %08lx\n",
-	       regs->d0, regs->d1, regs->d2, regs->d3);
-	printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
-	       regs->d4, regs->d5, regs->a0, regs->a1);
-
-	printk("Process %s (pid: %d, task=%p)\n",
-		current->comm, task_pid_nr(current), current);
-	addr = (unsigned long)&fp->un;
-	printk("Frame format=%X ", regs->format);
-	switch (regs->format) {
-	case 0x2:
-		printk("instr addr=%08lx\n", fp->un.fmt2.iaddr);
-		addr += sizeof(fp->un.fmt2);
-		break;
-	case 0x3:
-		printk("eff addr=%08lx\n", fp->un.fmt3.effaddr);
-		addr += sizeof(fp->un.fmt3);
-		break;
-	case 0x4:
-		printk((CPU_IS_060 ? "fault addr=%08lx fslw=%08lx\n"
-			: "eff addr=%08lx pc=%08lx\n"),
-			fp->un.fmt4.effaddr, fp->un.fmt4.pc);
-		addr += sizeof(fp->un.fmt4);
-		break;
-	case 0x7:
-		printk("eff addr=%08lx ssw=%04x faddr=%08lx\n",
-			fp->un.fmt7.effaddr, fp->un.fmt7.ssw, fp->un.fmt7.faddr);
-		printk("wb 1 stat/addr/data: %04x %08lx %08lx\n",
-			fp->un.fmt7.wb1s, fp->un.fmt7.wb1a, fp->un.fmt7.wb1dpd0);
-		printk("wb 2 stat/addr/data: %04x %08lx %08lx\n",
-			fp->un.fmt7.wb2s, fp->un.fmt7.wb2a, fp->un.fmt7.wb2d);
-		printk("wb 3 stat/addr/data: %04x %08lx %08lx\n",
-			fp->un.fmt7.wb3s, fp->un.fmt7.wb3a, fp->un.fmt7.wb3d);
-		printk("push data: %08lx %08lx %08lx %08lx\n",
-			fp->un.fmt7.wb1dpd0, fp->un.fmt7.pd1, fp->un.fmt7.pd2,
-			fp->un.fmt7.pd3);
-		addr += sizeof(fp->un.fmt7);
-		break;
-	case 0x9:
-		printk("instr addr=%08lx\n", fp->un.fmt9.iaddr);
-		addr += sizeof(fp->un.fmt9);
-		break;
-	case 0xa:
-		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
-			fp->un.fmta.ssw, fp->un.fmta.isc, fp->un.fmta.isb,
-			fp->un.fmta.daddr, fp->un.fmta.dobuf);
-		addr += sizeof(fp->un.fmta);
-		break;
-	case 0xb:
-		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
-			fp->un.fmtb.ssw, fp->un.fmtb.isc, fp->un.fmtb.isb,
-			fp->un.fmtb.daddr, fp->un.fmtb.dobuf);
-		printk("baddr=%08lx dibuf=%08lx ver=%x\n",
-			fp->un.fmtb.baddr, fp->un.fmtb.dibuf, fp->un.fmtb.ver);
-		addr += sizeof(fp->un.fmtb);
-		break;
-	default:
-		printk("\n");
-	}
-	show_stack(NULL, (unsigned long *)addr);
-
-	printk("Code:");
-	set_fs(KERNEL_DS);
-	cp = (u16 *)regs->pc;
-	for (i = -8; i < 16; i++) {
-		if (get_user(c, cp + i) && i >= 0) {
-			printk(" Bad PC value.");
-			break;
-		}
-		printk(i ? " %04x" : " <%04x>", c);
-	}
-	set_fs(old_fs);
-	printk ("\n");
-}
-
-void show_stack(struct task_struct *task, unsigned long *stack)
-{
-	unsigned long *p;
-	unsigned long *endstack;
-	int i;
-
-	if (!stack) {
-		if (task)
-			stack = (unsigned long *)task->thread.esp0;
-		else
-			stack = (unsigned long *)&stack;
-	}
-	endstack = (unsigned long *)(((unsigned long)stack + THREAD_SIZE - 1) & -THREAD_SIZE);
-
-	printk("Stack from %08lx:", (unsigned long)stack);
-	p = stack;
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (p + 1 > endstack)
-			break;
-		if (i % 8 == 0)
-			printk("\n       ");
-		printk(" %08lx", *p++);
-	}
-	printk("\n");
-	show_trace(stack);
-}
-
-/*
- * The architecture-independent backtrace generator
- */
-void dump_stack(void)
-{
-	unsigned long stack;
-
-	show_trace(&stack);
-}
-
-EXPORT_SYMBOL(dump_stack);
-
-void bad_super_trap (struct frame *fp)
-{
-	console_verbose();
-	if (fp->ptregs.vector < 4 * ARRAY_SIZE(vec_names))
-		printk ("*** %s ***   FORMAT=%X\n",
-			vec_names[(fp->ptregs.vector) >> 2],
-			fp->ptregs.format);
-	else
-		printk ("*** Exception %d ***   FORMAT=%X\n",
-			(fp->ptregs.vector) >> 2,
-			fp->ptregs.format);
-	if (fp->ptregs.vector >> 2 == VEC_ADDRERR && CPU_IS_020_OR_030) {
-		unsigned short ssw = fp->un.fmtb.ssw;
-
-		printk ("SSW=%#06x  ", ssw);
-
-		if (ssw & RC)
-			printk ("Pipe stage C instruction fault at %#010lx\n",
-				(fp->ptregs.format) == 0xA ?
-				fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2);
-		if (ssw & RB)
-			printk ("Pipe stage B instruction fault at %#010lx\n",
-				(fp->ptregs.format) == 0xA ?
-				fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
-		if (ssw & DF)
-			printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-				ssw & RW ? "read" : "write",
-				fp->un.fmtb.daddr, space_names[ssw & DFC],
-				fp->ptregs.pc);
-	}
-	printk ("Current process id is %d\n", task_pid_nr(current));
-	die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
-}
-
-asmlinkage void trap_c(struct frame *fp)
-{
-	int sig;
-	siginfo_t info;
-
-	if (fp->ptregs.sr & PS_S) {
-		if (fp->ptregs.vector == VEC_TRACE << 2) {
-			/* traced a trapping instruction on a 68020/30,
-			 * real exception will be executed afterwards.
-			 */
-		} else if (!handle_kernel_fault(&fp->ptregs))
-			bad_super_trap(fp);
-		return;
-	}
-
-	/* send the appropriate signal to the user program */
-	switch ((fp->ptregs.vector) >> 2) {
-	    case VEC_ADDRERR:
-		info.si_code = BUS_ADRALN;
-		sig = SIGBUS;
-		break;
-	    case VEC_ILLEGAL:
-	    case VEC_LINE10:
-	    case VEC_LINE11:
-		info.si_code = ILL_ILLOPC;
-		sig = SIGILL;
-		break;
-	    case VEC_PRIV:
-		info.si_code = ILL_PRVOPC;
-		sig = SIGILL;
-		break;
-	    case VEC_COPROC:
-		info.si_code = ILL_COPROC;
-		sig = SIGILL;
-		break;
-	    case VEC_TRAP1:
-	    case VEC_TRAP2:
-	    case VEC_TRAP3:
-	    case VEC_TRAP4:
-	    case VEC_TRAP5:
-	    case VEC_TRAP6:
-	    case VEC_TRAP7:
-	    case VEC_TRAP8:
-	    case VEC_TRAP9:
-	    case VEC_TRAP10:
-	    case VEC_TRAP11:
-	    case VEC_TRAP12:
-	    case VEC_TRAP13:
-	    case VEC_TRAP14:
-		info.si_code = ILL_ILLTRP;
-		sig = SIGILL;
-		break;
-	    case VEC_FPBRUC:
-	    case VEC_FPOE:
-	    case VEC_FPNAN:
-		info.si_code = FPE_FLTINV;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPIR:
-		info.si_code = FPE_FLTRES;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPDIVZ:
-		info.si_code = FPE_FLTDIV;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPUNDER:
-		info.si_code = FPE_FLTUND;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPOVER:
-		info.si_code = FPE_FLTOVF;
-		sig = SIGFPE;
-		break;
-	    case VEC_ZERODIV:
-		info.si_code = FPE_INTDIV;
-		sig = SIGFPE;
-		break;
-	    case VEC_CHK:
-	    case VEC_TRAP:
-		info.si_code = FPE_INTOVF;
-		sig = SIGFPE;
-		break;
-	    case VEC_TRACE:		/* ptrace single step */
-		info.si_code = TRAP_TRACE;
-		sig = SIGTRAP;
-		break;
-	    case VEC_TRAP15:		/* breakpoint */
-		info.si_code = TRAP_BRKPT;
-		sig = SIGTRAP;
-		break;
-	    default:
-		info.si_code = ILL_ILLOPC;
-		sig = SIGILL;
-		break;
-	}
-	info.si_signo = sig;
-	info.si_errno = 0;
-	switch (fp->ptregs.format) {
-	    default:
-		info.si_addr = (void *) fp->ptregs.pc;
-		break;
-	    case 2:
-		info.si_addr = (void *) fp->un.fmt2.iaddr;
-		break;
-	    case 7:
-		info.si_addr = (void *) fp->un.fmt7.effaddr;
-		break;
-	    case 9:
-		info.si_addr = (void *) fp->un.fmt9.iaddr;
-		break;
-	    case 10:
-		info.si_addr = (void *) fp->un.fmta.daddr;
-		break;
-	    case 11:
-		info.si_addr = (void *) fp->un.fmtb.daddr;
-		break;
-	}
-	force_sig_info (sig, &info, current);
-}
-
-void die_if_kernel (char *str, struct pt_regs *fp, int nr)
-{
-	if (!(fp->sr & PS_S))
-		return;
-
-	console_verbose();
-	printk("%s: %08x\n",str,nr);
-	show_registers(fp);
-	add_taint(TAINT_DIE);
-	do_exit(SIGSEGV);
-}
-
-/*
- * This function is called if an error occur while accessing
- * user-space from the fpsp040 code.
- */
-asmlinkage void fpsp040_die(void)
-{
-	do_exit(SIGSEGV);
-}
-
-#ifdef CONFIG_M68KFPU_EMU
-asmlinkage void fpemu_signal(int signal, int code, void *addr)
-{
-	siginfo_t info;
-
-	info.si_signo = signal;
-	info.si_errno = 0;
-	info.si_code = code;
-	info.si_addr = addr;
-	force_sig_info(signal, &info, current);
-}
+#include "traps_no.c"
 #endif

diff --git a/arch/m68k/kernel/traps_mm.c b/arch/m68k/kernel/traps_mm.c
new file mode 100644
index 0000000..4022bbc
--- /dev/null
+++ b/arch/m68k/kernel/traps_mm.c

@@ -0,0 +1,1207 @@
+/*
+ *  linux/arch/m68k/kernel/traps.c
+ *
+ *  Copyright (C) 1993, 1994 by Hamish Macdonald
+ *
+ *  68040 fixes by Michael Rausch
+ *  68040 fixes by Martin Apel
+ *  68040 fixes and writeback by Richard Zidlicky
+ *  68060 fixes by Roman Hodek
+ *  68060 fixes by Jesper Skov
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+/*
+ * Sets up all exception vectors
+ */
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/user.h>
+#include <linux/string.h>
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <linux/ptrace.h>
+#include <linux/kallsyms.h>
+
+#include <asm/setup.h>
+#include <asm/fpu.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/traps.h>
+#include <asm/pgalloc.h>
+#include <asm/machdep.h>
+#include <asm/siginfo.h>
+
+/* assembler routines */
+asmlinkage void system_call(void);
+asmlinkage void buserr(void);
+asmlinkage void trap(void);
+asmlinkage void nmihandler(void);
+#ifdef CONFIG_M68KFPU_EMU
+asmlinkage void fpu_emu(void);
+#endif
+
+e_vector vectors[256];
+
+/* nmi handler for the Amiga */
+asm(".text\n"
+    __ALIGN_STR "\n"
+    "nmihandler: rte");
+
+/*
+ * this must be called very early as the kernel might
+ * use some instruction that are emulated on the 060
+ * and so we're prepared for early probe attempts (e.g. nf_init).
+ */
+void __init base_trap_init(void)
+{
+	if (MACH_IS_SUN3X) {
+		extern e_vector *sun3x_prom_vbr;
+
+		__asm__ volatile ("movec %%vbr, %0" : "=r" (sun3x_prom_vbr));
+	}
+
+	/* setup the exception vector table */
+	__asm__ volatile ("movec %0,%%vbr" : : "r" ((void*)vectors));
+
+	if (CPU_IS_060) {
+		/* set up ISP entry points */
+		asmlinkage void unimp_vec(void) asm ("_060_isp_unimp");
+
+		vectors[VEC_UNIMPII] = unimp_vec;
+	}
+
+	vectors[VEC_BUSERR] = buserr;
+	vectors[VEC_ILLEGAL] = trap;
+	vectors[VEC_SYS] = system_call;
+}
+
+void __init trap_init (void)
+{
+	int i;
+
+	for (i = VEC_SPUR; i <= VEC_INT7; i++)
+		vectors[i] = bad_inthandler;
+
+	for (i = 0; i < VEC_USER; i++)
+		if (!vectors[i])
+			vectors[i] = trap;
+
+	for (i = VEC_USER; i < 256; i++)
+		vectors[i] = bad_inthandler;
+
+#ifdef CONFIG_M68KFPU_EMU
+	if (FPU_IS_EMU)
+		vectors[VEC_LINE11] = fpu_emu;
+#endif
+
+	if (CPU_IS_040 && !FPU_IS_EMU) {
+		/* set up FPSP entry points */
+		asmlinkage void dz_vec(void) asm ("dz");
+		asmlinkage void inex_vec(void) asm ("inex");
+		asmlinkage void ovfl_vec(void) asm ("ovfl");
+		asmlinkage void unfl_vec(void) asm ("unfl");
+		asmlinkage void snan_vec(void) asm ("snan");
+		asmlinkage void operr_vec(void) asm ("operr");
+		asmlinkage void bsun_vec(void) asm ("bsun");
+		asmlinkage void fline_vec(void) asm ("fline");
+		asmlinkage void unsupp_vec(void) asm ("unsupp");
+
+		vectors[VEC_FPDIVZ] = dz_vec;
+		vectors[VEC_FPIR] = inex_vec;
+		vectors[VEC_FPOVER] = ovfl_vec;
+		vectors[VEC_FPUNDER] = unfl_vec;
+		vectors[VEC_FPNAN] = snan_vec;
+		vectors[VEC_FPOE] = operr_vec;
+		vectors[VEC_FPBRUC] = bsun_vec;
+		vectors[VEC_LINE11] = fline_vec;
+		vectors[VEC_FPUNSUP] = unsupp_vec;
+	}
+
+	if (CPU_IS_060 && !FPU_IS_EMU) {
+		/* set up IFPSP entry points */
+		asmlinkage void snan_vec6(void) asm ("_060_fpsp_snan");
+		asmlinkage void operr_vec6(void) asm ("_060_fpsp_operr");
+		asmlinkage void ovfl_vec6(void) asm ("_060_fpsp_ovfl");
+		asmlinkage void unfl_vec6(void) asm ("_060_fpsp_unfl");
+		asmlinkage void dz_vec6(void) asm ("_060_fpsp_dz");
+		asmlinkage void inex_vec6(void) asm ("_060_fpsp_inex");
+		asmlinkage void fline_vec6(void) asm ("_060_fpsp_fline");
+		asmlinkage void unsupp_vec6(void) asm ("_060_fpsp_unsupp");
+		asmlinkage void effadd_vec6(void) asm ("_060_fpsp_effadd");
+
+		vectors[VEC_FPNAN] = snan_vec6;
+		vectors[VEC_FPOE] = operr_vec6;
+		vectors[VEC_FPOVER] = ovfl_vec6;
+		vectors[VEC_FPUNDER] = unfl_vec6;
+		vectors[VEC_FPDIVZ] = dz_vec6;
+		vectors[VEC_FPIR] = inex_vec6;
+		vectors[VEC_LINE11] = fline_vec6;
+		vectors[VEC_FPUNSUP] = unsupp_vec6;
+		vectors[VEC_UNIMPEA] = effadd_vec6;
+	}
+
+        /* if running on an amiga, make the NMI interrupt do nothing */
+	if (MACH_IS_AMIGA) {
+		vectors[VEC_INT7] = nmihandler;
+	}
+}
+
+
+static const char *vec_names[] = {
+	[VEC_RESETSP]	= "RESET SP",
+	[VEC_RESETPC]	= "RESET PC",
+	[VEC_BUSERR]	= "BUS ERROR",
+	[VEC_ADDRERR]	= "ADDRESS ERROR",
+	[VEC_ILLEGAL]	= "ILLEGAL INSTRUCTION",
+	[VEC_ZERODIV]	= "ZERO DIVIDE",
+	[VEC_CHK]	= "CHK",
+	[VEC_TRAP]	= "TRAPcc",
+	[VEC_PRIV]	= "PRIVILEGE VIOLATION",
+	[VEC_TRACE]	= "TRACE",
+	[VEC_LINE10]	= "LINE 1010",
+	[VEC_LINE11]	= "LINE 1111",
+	[VEC_RESV12]	= "UNASSIGNED RESERVED 12",
+	[VEC_COPROC]	= "COPROCESSOR PROTOCOL VIOLATION",
+	[VEC_FORMAT]	= "FORMAT ERROR",
+	[VEC_UNINT]	= "UNINITIALIZED INTERRUPT",
+	[VEC_RESV16]	= "UNASSIGNED RESERVED 16",
+	[VEC_RESV17]	= "UNASSIGNED RESERVED 17",
+	[VEC_RESV18]	= "UNASSIGNED RESERVED 18",
+	[VEC_RESV19]	= "UNASSIGNED RESERVED 19",
+	[VEC_RESV20]	= "UNASSIGNED RESERVED 20",
+	[VEC_RESV21]	= "UNASSIGNED RESERVED 21",
+	[VEC_RESV22]	= "UNASSIGNED RESERVED 22",
+	[VEC_RESV23]	= "UNASSIGNED RESERVED 23",
+	[VEC_SPUR]	= "SPURIOUS INTERRUPT",
+	[VEC_INT1]	= "LEVEL 1 INT",
+	[VEC_INT2]	= "LEVEL 2 INT",
+	[VEC_INT3]	= "LEVEL 3 INT",
+	[VEC_INT4]	= "LEVEL 4 INT",
+	[VEC_INT5]	= "LEVEL 5 INT",
+	[VEC_INT6]	= "LEVEL 6 INT",
+	[VEC_INT7]	= "LEVEL 7 INT",
+	[VEC_SYS]	= "SYSCALL",
+	[VEC_TRAP1]	= "TRAP #1",
+	[VEC_TRAP2]	= "TRAP #2",
+	[VEC_TRAP3]	= "TRAP #3",
+	[VEC_TRAP4]	= "TRAP #4",
+	[VEC_TRAP5]	= "TRAP #5",
+	[VEC_TRAP6]	= "TRAP #6",
+	[VEC_TRAP7]	= "TRAP #7",
+	[VEC_TRAP8]	= "TRAP #8",
+	[VEC_TRAP9]	= "TRAP #9",
+	[VEC_TRAP10]	= "TRAP #10",
+	[VEC_TRAP11]	= "TRAP #11",
+	[VEC_TRAP12]	= "TRAP #12",
+	[VEC_TRAP13]	= "TRAP #13",
+	[VEC_TRAP14]	= "TRAP #14",
+	[VEC_TRAP15]	= "TRAP #15",
+	[VEC_FPBRUC]	= "FPCP BSUN",
+	[VEC_FPIR]	= "FPCP INEXACT",
+	[VEC_FPDIVZ]	= "FPCP DIV BY 0",
+	[VEC_FPUNDER]	= "FPCP UNDERFLOW",
+	[VEC_FPOE]	= "FPCP OPERAND ERROR",
+	[VEC_FPOVER]	= "FPCP OVERFLOW",
+	[VEC_FPNAN]	= "FPCP SNAN",
+	[VEC_FPUNSUP]	= "FPCP UNSUPPORTED OPERATION",
+	[VEC_MMUCFG]	= "MMU CONFIGURATION ERROR",
+	[VEC_MMUILL]	= "MMU ILLEGAL OPERATION ERROR",
+	[VEC_MMUACC]	= "MMU ACCESS LEVEL VIOLATION ERROR",
+	[VEC_RESV59]	= "UNASSIGNED RESERVED 59",
+	[VEC_UNIMPEA]	= "UNASSIGNED RESERVED 60",
+	[VEC_UNIMPII]	= "UNASSIGNED RESERVED 61",
+	[VEC_RESV62]	= "UNASSIGNED RESERVED 62",
+	[VEC_RESV63]	= "UNASSIGNED RESERVED 63",
+};
+
+static const char *space_names[] = {
+	[0]		= "Space 0",
+	[USER_DATA]	= "User Data",
+	[USER_PROGRAM]	= "User Program",
+#ifndef CONFIG_SUN3
+	[3]		= "Space 3",
+#else
+	[FC_CONTROL]	= "Control",
+#endif
+	[4]		= "Space 4",
+	[SUPER_DATA]	= "Super Data",
+	[SUPER_PROGRAM]	= "Super Program",
+	[CPU_SPACE]	= "CPU"
+};
+
+void die_if_kernel(char *,struct pt_regs *,int);
+asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
+                             unsigned long error_code);
+int send_fault_sig(struct pt_regs *regs);
+
+asmlinkage void trap_c(struct frame *fp);
+
+#if defined (CONFIG_M68060)
+static inline void access_error060 (struct frame *fp)
+{
+	unsigned long fslw = fp->un.fmt4.pc; /* is really FSLW for access error */
+
+#ifdef DEBUG
+	printk("fslw=%#lx, fa=%#lx\n", fslw, fp->un.fmt4.effaddr);
+#endif
+
+	if (fslw & MMU060_BPE) {
+		/* branch prediction error -> clear branch cache */
+		__asm__ __volatile__ ("movec %/cacr,%/d0\n\t"
+				      "orl   #0x00400000,%/d0\n\t"
+				      "movec %/d0,%/cacr"
+				      : : : "d0" );
+		/* return if there's no other error */
+		if (!(fslw & MMU060_ERR_BITS) && !(fslw & MMU060_SEE))
+			return;
+	}
+
+	if (fslw & (MMU060_DESC_ERR | MMU060_WP | MMU060_SP)) {
+		unsigned long errorcode;
+		unsigned long addr = fp->un.fmt4.effaddr;
+
+		if (fslw & MMU060_MA)
+			addr = (addr + PAGE_SIZE - 1) & PAGE_MASK;
+
+		errorcode = 1;
+		if (fslw & MMU060_DESC_ERR) {
+			__flush_tlb040_one(addr);
+			errorcode = 0;
+		}
+		if (fslw & MMU060_W)
+			errorcode |= 2;
+#ifdef DEBUG
+		printk("errorcode = %d\n", errorcode );
+#endif
+		do_page_fault(&fp->ptregs, addr, errorcode);
+	} else if (fslw & (MMU060_SEE)){
+		/* Software Emulation Error.
+		 * fault during mem_read/mem_write in ifpsp060/os.S
+		 */
+		send_fault_sig(&fp->ptregs);
+	} else if (!(fslw & (MMU060_RE|MMU060_WE)) ||
+		   send_fault_sig(&fp->ptregs) > 0) {
+		printk("pc=%#lx, fa=%#lx\n", fp->ptregs.pc, fp->un.fmt4.effaddr);
+		printk( "68060 access error, fslw=%lx\n", fslw );
+		trap_c( fp );
+	}
+}
+#endif /* CONFIG_M68060 */
+
+#if defined (CONFIG_M68040)
+static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs)
+{
+	unsigned long mmusr;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(MAKE_MM_SEG(wbs));
+
+	if (iswrite)
+		asm volatile (".chip 68040; ptestw (%0); .chip 68k" : : "a" (addr));
+	else
+		asm volatile (".chip 68040; ptestr (%0); .chip 68k" : : "a" (addr));
+
+	asm volatile (".chip 68040; movec %%mmusr,%0; .chip 68k" : "=r" (mmusr));
+
+	set_fs(old_fs);
+
+	return mmusr;
+}
+
+static inline int do_040writeback1(unsigned short wbs, unsigned long wba,
+				   unsigned long wbd)
+{
+	int res = 0;
+	mm_segment_t old_fs = get_fs();
+
+	/* set_fs can not be moved, otherwise put_user() may oops */
+	set_fs(MAKE_MM_SEG(wbs));
+
+	switch (wbs & WBSIZ_040) {
+	case BA_SIZE_BYTE:
+		res = put_user(wbd & 0xff, (char __user *)wba);
+		break;
+	case BA_SIZE_WORD:
+		res = put_user(wbd & 0xffff, (short __user *)wba);
+		break;
+	case BA_SIZE_LONG:
+		res = put_user(wbd, (int __user *)wba);
+		break;
+	}
+
+	/* set_fs can not be moved, otherwise put_user() may oops */
+	set_fs(old_fs);
+
+
+#ifdef DEBUG
+	printk("do_040writeback1, res=%d\n",res);
+#endif
+
+	return res;
+}
+
+/* after an exception in a writeback the stack frame corresponding
+ * to that exception is discarded, set a few bits in the old frame
+ * to simulate what it should look like
+ */
+static inline void fix_xframe040(struct frame *fp, unsigned long wba, unsigned short wbs)
+{
+	fp->un.fmt7.faddr = wba;
+	fp->un.fmt7.ssw = wbs & 0xff;
+	if (wba != current->thread.faddr)
+	    fp->un.fmt7.ssw |= MA_040;
+}
+
+static inline void do_040writebacks(struct frame *fp)
+{
+	int res = 0;
+#if 0
+	if (fp->un.fmt7.wb1s & WBV_040)
+		printk("access_error040: cannot handle 1st writeback. oops.\n");
+#endif
+
+	if ((fp->un.fmt7.wb2s & WBV_040) &&
+	    !(fp->un.fmt7.wb2s & WBTT_040)) {
+		res = do_040writeback1(fp->un.fmt7.wb2s, fp->un.fmt7.wb2a,
+				       fp->un.fmt7.wb2d);
+		if (res)
+			fix_xframe040(fp, fp->un.fmt7.wb2a, fp->un.fmt7.wb2s);
+		else
+			fp->un.fmt7.wb2s = 0;
+	}
+
+	/* do the 2nd wb only if the first one was successful (except for a kernel wb) */
+	if (fp->un.fmt7.wb3s & WBV_040 && (!res || fp->un.fmt7.wb3s & 4)) {
+		res = do_040writeback1(fp->un.fmt7.wb3s, fp->un.fmt7.wb3a,
+				       fp->un.fmt7.wb3d);
+		if (res)
+		    {
+			fix_xframe040(fp, fp->un.fmt7.wb3a, fp->un.fmt7.wb3s);
+
+			fp->un.fmt7.wb2s = fp->un.fmt7.wb3s;
+			fp->un.fmt7.wb3s &= (~WBV_040);
+			fp->un.fmt7.wb2a = fp->un.fmt7.wb3a;
+			fp->un.fmt7.wb2d = fp->un.fmt7.wb3d;
+		    }
+		else
+			fp->un.fmt7.wb3s = 0;
+	}
+
+	if (res)
+		send_fault_sig(&fp->ptregs);
+}
+
+/*
+ * called from sigreturn(), must ensure userspace code didn't
+ * manipulate exception frame to circumvent protection, then complete
+ * pending writebacks
+ * we just clear TM2 to turn it into a userspace access
+ */
+asmlinkage void berr_040cleanup(struct frame *fp)
+{
+	fp->un.fmt7.wb2s &= ~4;
+	fp->un.fmt7.wb3s &= ~4;
+
+	do_040writebacks(fp);
+}
+
+static inline void access_error040(struct frame *fp)
+{
+	unsigned short ssw = fp->un.fmt7.ssw;
+	unsigned long mmusr;
+
+#ifdef DEBUG
+	printk("ssw=%#x, fa=%#lx\n", ssw, fp->un.fmt7.faddr);
+        printk("wb1s=%#x, wb2s=%#x, wb3s=%#x\n", fp->un.fmt7.wb1s,
+		fp->un.fmt7.wb2s, fp->un.fmt7.wb3s);
+	printk ("wb2a=%lx, wb3a=%lx, wb2d=%lx, wb3d=%lx\n",
+		fp->un.fmt7.wb2a, fp->un.fmt7.wb3a,
+		fp->un.fmt7.wb2d, fp->un.fmt7.wb3d);
+#endif
+
+	if (ssw & ATC_040) {
+		unsigned long addr = fp->un.fmt7.faddr;
+		unsigned long errorcode;
+
+		/*
+		 * The MMU status has to be determined AFTER the address
+		 * has been corrected if there was a misaligned access (MA).
+		 */
+		if (ssw & MA_040)
+			addr = (addr + 7) & -8;
+
+		/* MMU error, get the MMUSR info for this access */
+		mmusr = probe040(!(ssw & RW_040), addr, ssw);
+#ifdef DEBUG
+		printk("mmusr = %lx\n", mmusr);
+#endif
+		errorcode = 1;
+		if (!(mmusr & MMU_R_040)) {
+			/* clear the invalid atc entry */
+			__flush_tlb040_one(addr);
+			errorcode = 0;
+		}
+
+		/* despite what documentation seems to say, RMW
+		 * accesses have always both the LK and RW bits set */
+		if (!(ssw & RW_040) || (ssw & LK_040))
+			errorcode |= 2;
+
+		if (do_page_fault(&fp->ptregs, addr, errorcode)) {
+#ifdef DEBUG
+			printk("do_page_fault() !=0\n");
+#endif
+			if (user_mode(&fp->ptregs)){
+				/* delay writebacks after signal delivery */
+#ifdef DEBUG
+			        printk(".. was usermode - return\n");
+#endif
+				return;
+			}
+			/* disable writeback into user space from kernel
+			 * (if do_page_fault didn't fix the mapping,
+                         * the writeback won't do good)
+			 */
+disable_wb:
+#ifdef DEBUG
+			printk(".. disabling wb2\n");
+#endif
+			if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr)
+				fp->un.fmt7.wb2s &= ~WBV_040;
+			if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr)
+				fp->un.fmt7.wb3s &= ~WBV_040;
+		}
+	} else {
+		/* In case of a bus error we either kill the process or expect
+		 * the kernel to catch the fault, which then is also responsible
+		 * for cleaning up the mess.
+		 */
+		current->thread.signo = SIGBUS;
+		current->thread.faddr = fp->un.fmt7.faddr;
+		if (send_fault_sig(&fp->ptregs) >= 0)
+			printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
+			       fp->un.fmt7.faddr);
+		goto disable_wb;
+	}
+
+	do_040writebacks(fp);
+}
+#endif /* CONFIG_M68040 */
+
+#if defined(CONFIG_SUN3)
+#include <asm/sun3mmu.h>
+
+extern int mmu_emu_handle_fault (unsigned long, int, int);
+
+/* sun3 version of bus_error030 */
+
+static inline void bus_error030 (struct frame *fp)
+{
+	unsigned char buserr_type = sun3_get_buserr ();
+	unsigned long addr, errorcode;
+	unsigned short ssw = fp->un.fmtb.ssw;
+	extern unsigned long _sun3_map_test_start, _sun3_map_test_end;
+
+#ifdef DEBUG
+	if (ssw & (FC | FB))
+		printk ("Instruction fault at %#010lx\n",
+			ssw & FC ?
+			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
+			:
+			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
+	if (ssw & DF)
+		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+			ssw & RW ? "read" : "write",
+			fp->un.fmtb.daddr,
+			space_names[ssw & DFC], fp->ptregs.pc);
+#endif
+
+	/*
+	 * Check if this page should be demand-mapped. This needs to go before
+	 * the testing for a bad kernel-space access (demand-mapping applies
+	 * to kernel accesses too).
+	 */
+
+	if ((ssw & DF)
+	    && (buserr_type & (SUN3_BUSERR_PROTERR | SUN3_BUSERR_INVALID))) {
+		if (mmu_emu_handle_fault (fp->un.fmtb.daddr, ssw & RW, 0))
+			return;
+	}
+
+	/* Check for kernel-space pagefault (BAD). */
+	if (fp->ptregs.sr & PS_S) {
+		/* kernel fault must be a data fault to user space */
+		if (! ((ssw & DF) && ((ssw & DFC) == USER_DATA))) {
+		     // try checking the kernel mappings before surrender
+		     if (mmu_emu_handle_fault (fp->un.fmtb.daddr, ssw & RW, 1))
+			  return;
+			/* instruction fault or kernel data fault! */
+			if (ssw & (FC | FB))
+				printk ("Instruction fault at %#010lx\n",
+					fp->ptregs.pc);
+			if (ssw & DF) {
+				/* was this fault incurred testing bus mappings? */
+				if((fp->ptregs.pc >= (unsigned long)&_sun3_map_test_start) &&
+				   (fp->ptregs.pc <= (unsigned long)&_sun3_map_test_end)) {
+					send_fault_sig(&fp->ptregs);
+					return;
+				}
+
+				printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+					ssw & RW ? "read" : "write",
+					fp->un.fmtb.daddr,
+					space_names[ssw & DFC], fp->ptregs.pc);
+			}
+			printk ("BAD KERNEL BUSERR\n");
+
+			die_if_kernel("Oops", &fp->ptregs,0);
+			force_sig(SIGKILL, current);
+			return;
+		}
+	} else {
+		/* user fault */
+		if (!(ssw & (FC | FB)) && !(ssw & DF))
+			/* not an instruction fault or data fault! BAD */
+			panic ("USER BUSERR w/o instruction or data fault");
+	}
+
+
+	/* First handle the data fault, if any.  */
+	if (ssw & DF) {
+		addr = fp->un.fmtb.daddr;
+
+// errorcode bit 0:	0 -> no page		1 -> protection fault
+// errorcode bit 1:	0 -> read fault		1 -> write fault
+
+// (buserr_type & SUN3_BUSERR_PROTERR)	-> protection fault
+// (buserr_type & SUN3_BUSERR_INVALID)	-> invalid page fault
+
+		if (buserr_type & SUN3_BUSERR_PROTERR)
+			errorcode = 0x01;
+		else if (buserr_type & SUN3_BUSERR_INVALID)
+			errorcode = 0x00;
+		else {
+#ifdef DEBUG
+			printk ("*** unexpected busfault type=%#04x\n", buserr_type);
+			printk ("invalid %s access at %#lx from pc %#lx\n",
+				!(ssw & RW) ? "write" : "read", addr,
+				fp->ptregs.pc);
+#endif
+			die_if_kernel ("Oops", &fp->ptregs, buserr_type);
+			force_sig (SIGBUS, current);
+			return;
+		}
+
+//todo: wtf is RM bit? --m
+		if (!(ssw & RW) || ssw & RM)
+			errorcode |= 0x02;
+
+		/* Handle page fault. */
+		do_page_fault (&fp->ptregs, addr, errorcode);
+
+		/* Retry the data fault now. */
+		return;
+	}
+
+	/* Now handle the instruction fault. */
+
+	/* Get the fault address. */
+	if (fp->ptregs.format == 0xA)
+		addr = fp->ptregs.pc + 4;
+	else
+		addr = fp->un.fmtb.baddr;
+	if (ssw & FC)
+		addr -= 2;
+
+	if (buserr_type & SUN3_BUSERR_INVALID) {
+		if (!mmu_emu_handle_fault (fp->un.fmtb.daddr, 1, 0))
+			do_page_fault (&fp->ptregs, addr, 0);
+       } else {
+#ifdef DEBUG
+		printk ("protection fault on insn access (segv).\n");
+#endif
+		force_sig (SIGSEGV, current);
+       }
+}
+#else
+#if defined(CPU_M68020_OR_M68030)
+static inline void bus_error030 (struct frame *fp)
+{
+	volatile unsigned short temp;
+	unsigned short mmusr;
+	unsigned long addr, errorcode;
+	unsigned short ssw = fp->un.fmtb.ssw;
+#ifdef DEBUG
+	unsigned long desc;
+
+	printk ("pid = %x  ", current->pid);
+	printk ("SSW=%#06x  ", ssw);
+
+	if (ssw & (FC | FB))
+		printk ("Instruction fault at %#010lx\n",
+			ssw & FC ?
+			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
+			:
+			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
+	if (ssw & DF)
+		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+			ssw & RW ? "read" : "write",
+			fp->un.fmtb.daddr,
+			space_names[ssw & DFC], fp->ptregs.pc);
+#endif
+
+	/* ++andreas: If a data fault and an instruction fault happen
+	   at the same time map in both pages.  */
+
+	/* First handle the data fault, if any.  */
+	if (ssw & DF) {
+		addr = fp->un.fmtb.daddr;
+
+#ifdef DEBUG
+		asm volatile ("ptestr %3,%2@,#7,%0\n\t"
+			      "pmove %%psr,%1@"
+			      : "=a&" (desc)
+			      : "a" (&temp), "a" (addr), "d" (ssw));
+#else
+		asm volatile ("ptestr %2,%1@,#7\n\t"
+			      "pmove %%psr,%0@"
+			      : : "a" (&temp), "a" (addr), "d" (ssw));
+#endif
+		mmusr = temp;
+
+#ifdef DEBUG
+		printk("mmusr is %#x for addr %#lx in task %p\n",
+		       mmusr, addr, current);
+		printk("descriptor address is %#lx, contents %#lx\n",
+		       __va(desc), *(unsigned long *)__va(desc));
+#endif
+
+		errorcode = (mmusr & MMU_I) ? 0 : 1;
+		if (!(ssw & RW) || (ssw & RM))
+			errorcode |= 2;
+
+		if (mmusr & (MMU_I | MMU_WP)) {
+			if (ssw & 4) {
+				printk("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+				       ssw & RW ? "read" : "write",
+				       fp->un.fmtb.daddr,
+				       space_names[ssw & DFC], fp->ptregs.pc);
+				goto buserr;
+			}
+			/* Don't try to do anything further if an exception was
+			   handled. */
+			if (do_page_fault (&fp->ptregs, addr, errorcode) < 0)
+				return;
+		} else if (!(mmusr & MMU_I)) {
+			/* probably a 020 cas fault */
+			if (!(ssw & RM) && send_fault_sig(&fp->ptregs) > 0)
+				printk("unexpected bus error (%#x,%#x)\n", ssw, mmusr);
+		} else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
+			printk("invalid %s access at %#lx from pc %#lx\n",
+			       !(ssw & RW) ? "write" : "read", addr,
+			       fp->ptregs.pc);
+			die_if_kernel("Oops",&fp->ptregs,mmusr);
+			force_sig(SIGSEGV, current);
+			return;
+		} else {
+#if 0
+			static volatile long tlong;
+#endif
+
+			printk("weird %s access at %#lx from pc %#lx (ssw is %#x)\n",
+			       !(ssw & RW) ? "write" : "read", addr,
+			       fp->ptregs.pc, ssw);
+			asm volatile ("ptestr #1,%1@,#0\n\t"
+				      "pmove %%psr,%0@"
+				      : /* no outputs */
+				      : "a" (&temp), "a" (addr));
+			mmusr = temp;
+
+			printk ("level 0 mmusr is %#x\n", mmusr);
+#if 0
+			asm volatile ("pmove %%tt0,%0@"
+				      : /* no outputs */
+				      : "a" (&tlong));
+			printk("tt0 is %#lx, ", tlong);
+			asm volatile ("pmove %%tt1,%0@"
+				      : /* no outputs */
+				      : "a" (&tlong));
+			printk("tt1 is %#lx\n", tlong);
+#endif
+#ifdef DEBUG
+			printk("Unknown SIGSEGV - 1\n");
+#endif
+			die_if_kernel("Oops",&fp->ptregs,mmusr);
+			force_sig(SIGSEGV, current);
+			return;
+		}
+
+		/* setup an ATC entry for the access about to be retried */
+		if (!(ssw & RW) || (ssw & RM))
+			asm volatile ("ploadw %1,%0@" : /* no outputs */
+				      : "a" (addr), "d" (ssw));
+		else
+			asm volatile ("ploadr %1,%0@" : /* no outputs */
+				      : "a" (addr), "d" (ssw));
+	}
+
+	/* Now handle the instruction fault. */
+
+	if (!(ssw & (FC|FB)))
+		return;
+
+	if (fp->ptregs.sr & PS_S) {
+		printk("Instruction fault at %#010lx\n",
+			fp->ptregs.pc);
+	buserr:
+		printk ("BAD KERNEL BUSERR\n");
+		die_if_kernel("Oops",&fp->ptregs,0);
+		force_sig(SIGKILL, current);
+		return;
+	}
+
+	/* get the fault address */
+	if (fp->ptregs.format == 10)
+		addr = fp->ptregs.pc + 4;
+	else
+		addr = fp->un.fmtb.baddr;
+	if (ssw & FC)
+		addr -= 2;
+
+	if ((ssw & DF) && ((addr ^ fp->un.fmtb.daddr) & PAGE_MASK) == 0)
+		/* Insn fault on same page as data fault.  But we
+		   should still create the ATC entry.  */
+		goto create_atc_entry;
+
+#ifdef DEBUG
+	asm volatile ("ptestr #1,%2@,#7,%0\n\t"
+		      "pmove %%psr,%1@"
+		      : "=a&" (desc)
+		      : "a" (&temp), "a" (addr));
+#else
+	asm volatile ("ptestr #1,%1@,#7\n\t"
+		      "pmove %%psr,%0@"
+		      : : "a" (&temp), "a" (addr));
+#endif
+	mmusr = temp;
+
+#ifdef DEBUG
+	printk ("mmusr is %#x for addr %#lx in task %p\n",
+		mmusr, addr, current);
+	printk ("descriptor address is %#lx, contents %#lx\n",
+		__va(desc), *(unsigned long *)__va(desc));
+#endif
+
+	if (mmusr & MMU_I)
+		do_page_fault (&fp->ptregs, addr, 0);
+	else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
+		printk ("invalid insn access at %#lx from pc %#lx\n",
+			addr, fp->ptregs.pc);
+#ifdef DEBUG
+		printk("Unknown SIGSEGV - 2\n");
+#endif
+		die_if_kernel("Oops",&fp->ptregs,mmusr);
+		force_sig(SIGSEGV, current);
+		return;
+	}
+
+create_atc_entry:
+	/* setup an ATC entry for the access about to be retried */
+	asm volatile ("ploadr #2,%0@" : /* no outputs */
+		      : "a" (addr));
+}
+#endif /* CPU_M68020_OR_M68030 */
+#endif /* !CONFIG_SUN3 */
+
+asmlinkage void buserr_c(struct frame *fp)
+{
+	/* Only set esp0 if coming from user mode */
+	if (user_mode(&fp->ptregs))
+		current->thread.esp0 = (unsigned long) fp;
+
+#ifdef DEBUG
+	printk ("*** Bus Error *** Format is %x\n", fp->ptregs.format);
+#endif
+
+	switch (fp->ptregs.format) {
+#if defined (CONFIG_M68060)
+	case 4:				/* 68060 access error */
+	  access_error060 (fp);
+	  break;
+#endif
+#if defined (CONFIG_M68040)
+	case 0x7:			/* 68040 access error */
+	  access_error040 (fp);
+	  break;
+#endif
+#if defined (CPU_M68020_OR_M68030)
+	case 0xa:
+	case 0xb:
+	  bus_error030 (fp);
+	  break;
+#endif
+	default:
+	  die_if_kernel("bad frame format",&fp->ptregs,0);
+#ifdef DEBUG
+	  printk("Unknown SIGSEGV - 4\n");
+#endif
+	  force_sig(SIGSEGV, current);
+	}
+}
+
+
+static int kstack_depth_to_print = 48;
+
+void show_trace(unsigned long *stack)
+{
+	unsigned long *endstack;
+	unsigned long addr;
+	int i;
+
+	printk("Call Trace:");
+	addr = (unsigned long)stack + THREAD_SIZE - 1;
+	endstack = (unsigned long *)(addr & -THREAD_SIZE);
+	i = 0;
+	while (stack + 1 <= endstack) {
+		addr = *stack++;
+		/*
+		 * If the address is either in the text segment of the
+		 * kernel, or in the region which contains vmalloc'ed
+		 * memory, it *may* be the address of a calling
+		 * routine; if so, print it so that someone tracing
+		 * down the cause of the crash will be able to figure
+		 * out the call path that was taken.
+		 */
+		if (__kernel_text_address(addr)) {
+#ifndef CONFIG_KALLSYMS
+			if (i % 5 == 0)
+				printk("\n       ");
+#endif
+			printk(" [<%08lx>] %pS\n", addr, (void *)addr);
+			i++;
+		}
+	}
+	printk("\n");
+}
+
+void show_registers(struct pt_regs *regs)
+{
+	struct frame *fp = (struct frame *)regs;
+	mm_segment_t old_fs = get_fs();
+	u16 c, *cp;
+	unsigned long addr;
+	int i;
+
+	print_modules();
+	printk("PC: [<%08lx>] %pS\n", regs->pc, (void *)regs->pc);
+	printk("SR: %04x  SP: %p  a2: %08lx\n", regs->sr, regs, regs->a2);
+	printk("d0: %08lx    d1: %08lx    d2: %08lx    d3: %08lx\n",
+	       regs->d0, regs->d1, regs->d2, regs->d3);
+	printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
+	       regs->d4, regs->d5, regs->a0, regs->a1);
+
+	printk("Process %s (pid: %d, task=%p)\n",
+		current->comm, task_pid_nr(current), current);
+	addr = (unsigned long)&fp->un;
+	printk("Frame format=%X ", regs->format);
+	switch (regs->format) {
+	case 0x2:
+		printk("instr addr=%08lx\n", fp->un.fmt2.iaddr);
+		addr += sizeof(fp->un.fmt2);
+		break;
+	case 0x3:
+		printk("eff addr=%08lx\n", fp->un.fmt3.effaddr);
+		addr += sizeof(fp->un.fmt3);
+		break;
+	case 0x4:
+		printk((CPU_IS_060 ? "fault addr=%08lx fslw=%08lx\n"
+			: "eff addr=%08lx pc=%08lx\n"),
+			fp->un.fmt4.effaddr, fp->un.fmt4.pc);
+		addr += sizeof(fp->un.fmt4);
+		break;
+	case 0x7:
+		printk("eff addr=%08lx ssw=%04x faddr=%08lx\n",
+			fp->un.fmt7.effaddr, fp->un.fmt7.ssw, fp->un.fmt7.faddr);
+		printk("wb 1 stat/addr/data: %04x %08lx %08lx\n",
+			fp->un.fmt7.wb1s, fp->un.fmt7.wb1a, fp->un.fmt7.wb1dpd0);
+		printk("wb 2 stat/addr/data: %04x %08lx %08lx\n",
+			fp->un.fmt7.wb2s, fp->un.fmt7.wb2a, fp->un.fmt7.wb2d);
+		printk("wb 3 stat/addr/data: %04x %08lx %08lx\n",
+			fp->un.fmt7.wb3s, fp->un.fmt7.wb3a, fp->un.fmt7.wb3d);
+		printk("push data: %08lx %08lx %08lx %08lx\n",
+			fp->un.fmt7.wb1dpd0, fp->un.fmt7.pd1, fp->un.fmt7.pd2,
+			fp->un.fmt7.pd3);
+		addr += sizeof(fp->un.fmt7);
+		break;
+	case 0x9:
+		printk("instr addr=%08lx\n", fp->un.fmt9.iaddr);
+		addr += sizeof(fp->un.fmt9);
+		break;
+	case 0xa:
+		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
+			fp->un.fmta.ssw, fp->un.fmta.isc, fp->un.fmta.isb,
+			fp->un.fmta.daddr, fp->un.fmta.dobuf);
+		addr += sizeof(fp->un.fmta);
+		break;
+	case 0xb:
+		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
+			fp->un.fmtb.ssw, fp->un.fmtb.isc, fp->un.fmtb.isb,
+			fp->un.fmtb.daddr, fp->un.fmtb.dobuf);
+		printk("baddr=%08lx dibuf=%08lx ver=%x\n",
+			fp->un.fmtb.baddr, fp->un.fmtb.dibuf, fp->un.fmtb.ver);
+		addr += sizeof(fp->un.fmtb);
+		break;
+	default:
+		printk("\n");
+	}
+	show_stack(NULL, (unsigned long *)addr);
+
+	printk("Code:");
+	set_fs(KERNEL_DS);
+	cp = (u16 *)regs->pc;
+	for (i = -8; i < 16; i++) {
+		if (get_user(c, cp + i) && i >= 0) {
+			printk(" Bad PC value.");
+			break;
+		}
+		printk(i ? " %04x" : " <%04x>", c);
+	}
+	set_fs(old_fs);
+	printk ("\n");
+}
+
+void show_stack(struct task_struct *task, unsigned long *stack)
+{
+	unsigned long *p;
+	unsigned long *endstack;
+	int i;
+
+	if (!stack) {
+		if (task)
+			stack = (unsigned long *)task->thread.esp0;
+		else
+			stack = (unsigned long *)&stack;
+	}
+	endstack = (unsigned long *)(((unsigned long)stack + THREAD_SIZE - 1) & -THREAD_SIZE);
+
+	printk("Stack from %08lx:", (unsigned long)stack);
+	p = stack;
+	for (i = 0; i < kstack_depth_to_print; i++) {
+		if (p + 1 > endstack)
+			break;
+		if (i % 8 == 0)
+			printk("\n       ");
+		printk(" %08lx", *p++);
+	}
+	printk("\n");
+	show_trace(stack);
+}
+
+/*
+ * The architecture-independent backtrace generator
+ */
+void dump_stack(void)
+{
+	unsigned long stack;
+
+	show_trace(&stack);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+void bad_super_trap (struct frame *fp)
+{
+	console_verbose();
+	if (fp->ptregs.vector < 4 * ARRAY_SIZE(vec_names))
+		printk ("*** %s ***   FORMAT=%X\n",
+			vec_names[(fp->ptregs.vector) >> 2],
+			fp->ptregs.format);
+	else
+		printk ("*** Exception %d ***   FORMAT=%X\n",
+			(fp->ptregs.vector) >> 2,
+			fp->ptregs.format);
+	if (fp->ptregs.vector >> 2 == VEC_ADDRERR && CPU_IS_020_OR_030) {
+		unsigned short ssw = fp->un.fmtb.ssw;
+
+		printk ("SSW=%#06x  ", ssw);
+
+		if (ssw & RC)
+			printk ("Pipe stage C instruction fault at %#010lx\n",
+				(fp->ptregs.format) == 0xA ?
+				fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2);
+		if (ssw & RB)
+			printk ("Pipe stage B instruction fault at %#010lx\n",
+				(fp->ptregs.format) == 0xA ?
+				fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
+		if (ssw & DF)
+			printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+				ssw & RW ? "read" : "write",
+				fp->un.fmtb.daddr, space_names[ssw & DFC],
+				fp->ptregs.pc);
+	}
+	printk ("Current process id is %d\n", task_pid_nr(current));
+	die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
+}
+
+asmlinkage void trap_c(struct frame *fp)
+{
+	int sig;
+	siginfo_t info;
+
+	if (fp->ptregs.sr & PS_S) {
+		if (fp->ptregs.vector == VEC_TRACE << 2) {
+			/* traced a trapping instruction on a 68020/30,
+			 * real exception will be executed afterwards.
+			 */
+		} else if (!handle_kernel_fault(&fp->ptregs))
+			bad_super_trap(fp);
+		return;
+	}
+
+	/* send the appropriate signal to the user program */
+	switch ((fp->ptregs.vector) >> 2) {
+	    case VEC_ADDRERR:
+		info.si_code = BUS_ADRALN;
+		sig = SIGBUS;
+		break;
+	    case VEC_ILLEGAL:
+	    case VEC_LINE10:
+	    case VEC_LINE11:
+		info.si_code = ILL_ILLOPC;
+		sig = SIGILL;
+		break;
+	    case VEC_PRIV:
+		info.si_code = ILL_PRVOPC;
+		sig = SIGILL;
+		break;
+	    case VEC_COPROC:
+		info.si_code = ILL_COPROC;
+		sig = SIGILL;
+		break;
+	    case VEC_TRAP1:
+	    case VEC_TRAP2:
+	    case VEC_TRAP3:
+	    case VEC_TRAP4:
+	    case VEC_TRAP5:
+	    case VEC_TRAP6:
+	    case VEC_TRAP7:
+	    case VEC_TRAP8:
+	    case VEC_TRAP9:
+	    case VEC_TRAP10:
+	    case VEC_TRAP11:
+	    case VEC_TRAP12:
+	    case VEC_TRAP13:
+	    case VEC_TRAP14:
+		info.si_code = ILL_ILLTRP;
+		sig = SIGILL;
+		break;
+	    case VEC_FPBRUC:
+	    case VEC_FPOE:
+	    case VEC_FPNAN:
+		info.si_code = FPE_FLTINV;
+		sig = SIGFPE;
+		break;
+	    case VEC_FPIR:
+		info.si_code = FPE_FLTRES;
+		sig = SIGFPE;
+		break;
+	    case VEC_FPDIVZ:
+		info.si_code = FPE_FLTDIV;
+		sig = SIGFPE;
+		break;
+	    case VEC_FPUNDER:
+		info.si_code = FPE_FLTUND;
+		sig = SIGFPE;
+		break;
+	    case VEC_FPOVER:
+		info.si_code = FPE_FLTOVF;
+		sig = SIGFPE;
+		break;
+	    case VEC_ZERODIV:
+		info.si_code = FPE_INTDIV;
+		sig = SIGFPE;
+		break;
+	    case VEC_CHK:
+	    case VEC_TRAP:
+		info.si_code = FPE_INTOVF;
+		sig = SIGFPE;
+		break;
+	    case VEC_TRACE:		/* ptrace single step */
+		info.si_code = TRAP_TRACE;
+		sig = SIGTRAP;
+		break;
+	    case VEC_TRAP15:		/* breakpoint */
+		info.si_code = TRAP_BRKPT;
+		sig = SIGTRAP;
+		break;
+	    default:
+		info.si_code = ILL_ILLOPC;
+		sig = SIGILL;
+		break;
+	}
+	info.si_signo = sig;
+	info.si_errno = 0;
+	switch (fp->ptregs.format) {
+	    default:
+		info.si_addr = (void *) fp->ptregs.pc;
+		break;
+	    case 2:
+		info.si_addr = (void *) fp->un.fmt2.iaddr;
+		break;
+	    case 7:
+		info.si_addr = (void *) fp->un.fmt7.effaddr;
+		break;
+	    case 9:
+		info.si_addr = (void *) fp->un.fmt9.iaddr;
+		break;
+	    case 10:
+		info.si_addr = (void *) fp->un.fmta.daddr;
+		break;
+	    case 11:
+		info.si_addr = (void *) fp->un.fmtb.daddr;
+		break;
+	}
+	force_sig_info (sig, &info, current);
+}
+
+void die_if_kernel (char *str, struct pt_regs *fp, int nr)
+{
+	if (!(fp->sr & PS_S))
+		return;
+
+	console_verbose();
+	printk("%s: %08x\n",str,nr);
+	show_registers(fp);
+	add_taint(TAINT_DIE);
+	do_exit(SIGSEGV);
+}
+
+/*
+ * This function is called if an error occur while accessing
+ * user-space from the fpsp040 code.
+ */
+asmlinkage void fpsp040_die(void)
+{
+	do_exit(SIGSEGV);
+}
+
+#ifdef CONFIG_M68KFPU_EMU
+asmlinkage void fpemu_signal(int signal, int code, void *addr)
+{
+	siginfo_t info;
+
+	info.si_signo = signal;
+	info.si_errno = 0;
+	info.si_code = code;
+	info.si_addr = addr;
+	force_sig_info(signal, &info, current);
+}
+#endif

diff --git a/arch/m68knommu/kernel/traps.c b/arch/m68k/kernel/traps_no.c
similarity index 100%
rename from arch/m68knommu/kernel/traps.c
rename to arch/m68k/kernel/traps_no.c


diff --git a/arch/m68k/kernel/vmlinux.lds.S b/arch/m68k/kernel/vmlinux.lds.S
index 99ba315..030dabf 100644
--- a/arch/m68k/kernel/vmlinux.lds.S
+++ b/arch/m68k/kernel/vmlinux.lds.S

@@ -1,10 +1,5 @@
-PHDRS
-{
-  text PT_LOAD FILEHDR PHDRS FLAGS (7);
-  data PT_LOAD FLAGS (7);
-}
-#ifdef CONFIG_SUN3
-#include "vmlinux-sun3.lds"
+#ifdef CONFIG_MMU
+#include "vmlinux.lds_mm.S"
 #else
-#include "vmlinux-std.lds"
+#include "vmlinux.lds_no.S"
 #endif

diff --git a/arch/m68k/kernel/vmlinux.lds_mm.S b/arch/m68k/kernel/vmlinux.lds_mm.S
new file mode 100644
index 0000000..99ba315
--- /dev/null
+++ b/arch/m68k/kernel/vmlinux.lds_mm.S

@@ -0,0 +1,10 @@
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS FLAGS (7);
+  data PT_LOAD FLAGS (7);
+}
+#ifdef CONFIG_SUN3
+#include "vmlinux-sun3.lds"
+#else
+#include "vmlinux-std.lds"
+#endif

diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68k/kernel/vmlinux.lds_no.S
similarity index 100%
rename from arch/m68knommu/kernel/vmlinux.lds.S
rename to arch/m68k/kernel/vmlinux.lds_no.S


diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile
index af9abf8..1f95881 100644
--- a/arch/m68k/lib/Makefile
+++ b/arch/m68k/lib/Makefile

@@ -1,6 +1,5 @@
-#
-# Makefile for m68k-specific library files..
-#
-
-lib-y	:= ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
-	   checksum.o string.o uaccess.o
+ifdef CONFIG_MMU
+include arch/m68k/lib/Makefile_mm
+else
+include arch/m68k/lib/Makefile_no
+endif

diff --git a/arch/m68k/lib/Makefile_mm b/arch/m68k/lib/Makefile_mm
new file mode 100644
index 0000000..af9abf8
--- /dev/null
+++ b/arch/m68k/lib/Makefile_mm

@@ -0,0 +1,6 @@
+#
+# Makefile for m68k-specific library files..
+#
+
+lib-y	:= ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
+	   checksum.o string.o uaccess.o

diff --git a/arch/m68knommu/lib/Makefile b/arch/m68k/lib/Makefile_no
similarity index 100%
rename from arch/m68knommu/lib/Makefile
rename to arch/m68k/lib/Makefile_no


diff --git a/arch/m68k/lib/checksum.c b/arch/m68k/lib/checksum.c
index 6216f12..1297536 100644
--- a/arch/m68k/lib/checksum.c
+++ b/arch/m68k/lib/checksum.c

@@ -1,425 +1,5 @@
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		IP/TCP/UDP checksumming routines
- *
- * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
- *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- *		Tom May, <ftom@netcom.com>
- *		Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
- *		Lots of code moved from tcp.c and ip.c; see those files
- *		for more names.
- *
- * 03/02/96	Jes Sorensen, Andreas Schwab, Roman Hodek:
- *		Fixed some nasty bugs, causing some horrible crashes.
- *		A: At some points, the sum (%0) was used as
- *		length-counter instead of the length counter
- *		(%1). Thanks to Roman Hodek for pointing this out.
- *		B: GCC seems to mess up if one uses too many
- *		data-registers to hold input values and one tries to
- *		specify d0 and d1 as scratch registers. Letting gcc
- *		choose these registers itself solves the problem.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- * 1998/8/31	Andreas Schwab:
- *		Zero out rest of buffer on exception in
- *		csum_partial_copy_from_user.
- */
-
-#include <linux/module.h>
-#include <net/checksum.h>
-
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-
-__wsum csum_partial(const void *buff, int len, __wsum sum)
-{
-	unsigned long tmp1, tmp2;
-	  /*
-	   * Experiments with ethernet and slip connections show that buff
-	   * is aligned on either a 2-byte or 4-byte boundary.
-	   */
-	__asm__("movel %2,%3\n\t"
-		"btst #1,%3\n\t"	/* Check alignment */
-		"jeq 2f\n\t"
-		"subql #2,%1\n\t"	/* buff%4==2: treat first word */
-		"jgt 1f\n\t"
-		"addql #2,%1\n\t"	/* len was == 2, treat only rest */
-		"jra 4f\n"
-	     "1:\t"
-		"addw %2@+,%0\n\t"	/* add first word to sum */
-		"clrl %3\n\t"
-		"addxl %3,%0\n"		/* add X bit */
-	     "2:\t"
-		/* unrolled loop for the main part: do 8 longs at once */
-		"movel %1,%3\n\t"	/* save len in tmp1 */
-		"lsrl #5,%1\n\t"	/* len/32 */
-		"jeq 2f\n\t"		/* not enough... */
-		"subql #1,%1\n"
-	     "1:\t"
-		"movel %2@+,%4\n\t"
-		"addxl %4,%0\n\t"
-		"movel %2@+,%4\n\t"
-		"addxl %4,%0\n\t"
-		"movel %2@+,%4\n\t"
-		"addxl %4,%0\n\t"
-		"movel %2@+,%4\n\t"
-		"addxl %4,%0\n\t"
-		"movel %2@+,%4\n\t"
-		"addxl %4,%0\n\t"
-		"movel %2@+,%4\n\t"
-		"addxl %4,%0\n\t"
-		"movel %2@+,%4\n\t"
-		"addxl %4,%0\n\t"
-		"movel %2@+,%4\n\t"
-		"addxl %4,%0\n\t"
-		"dbra %1,1b\n\t"
-		"clrl %4\n\t"
-		"addxl %4,%0\n\t"	/* add X bit */
-		"clrw %1\n\t"
-		"subql #1,%1\n\t"
-		"jcc 1b\n"
-	     "2:\t"
-		"movel %3,%1\n\t"	/* restore len from tmp1 */
-		"andw #0x1c,%3\n\t"	/* number of rest longs */
-		"jeq 4f\n\t"
-		"lsrw #2,%3\n\t"
-		"subqw #1,%3\n"
-	     "3:\t"
-		/* loop for rest longs */
-		"movel %2@+,%4\n\t"
-		"addxl %4,%0\n\t"
-		"dbra %3,3b\n\t"
-		"clrl %4\n\t"
-		"addxl %4,%0\n"		/* add X bit */
-	     "4:\t"
-		/* now check for rest bytes that do not fit into longs */
-		"andw #3,%1\n\t"
-		"jeq 7f\n\t"
-		"clrl %4\n\t"		/* clear tmp2 for rest bytes */
-		"subqw #2,%1\n\t"
-		"jlt 5f\n\t"
-		"movew %2@+,%4\n\t"	/* have rest >= 2: get word */
-		"swap %4\n\t"		/* into bits 16..31 */
-		"tstw %1\n\t"		/* another byte? */
-		"jeq 6f\n"
-	     "5:\t"
-		"moveb %2@,%4\n\t"	/* have odd rest: get byte */
-		"lslw #8,%4\n\t"	/* into bits 8..15; 16..31 untouched */
-	     "6:\t"
-		"addl %4,%0\n\t"	/* now add rest long to sum */
-		"clrl %4\n\t"
-		"addxl %4,%0\n"		/* add X bit */
-	     "7:\t"
-		: "=d" (sum), "=d" (len), "=a" (buff),
-		  "=&d" (tmp1), "=&d" (tmp2)
-		: "0" (sum), "1" (len), "2" (buff)
-	    );
-	return(sum);
-}
-
-EXPORT_SYMBOL(csum_partial);
-
-
-/*
- * copy from user space while checksumming, with exception handling.
- */
-
-__wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
-			    int len, __wsum sum, int *csum_err)
-{
-	/*
-	 * GCC doesn't like more than 10 operands for the asm
-	 * statements so we have to use tmp2 for the error
-	 * code.
-	 */
-	unsigned long tmp1, tmp2;
-
-	__asm__("movel %2,%4\n\t"
-		"btst #1,%4\n\t"	/* Check alignment */
-		"jeq 2f\n\t"
-		"subql #2,%1\n\t"	/* buff%4==2: treat first word */
-		"jgt 1f\n\t"
-		"addql #2,%1\n\t"	/* len was == 2, treat only rest */
-		"jra 4f\n"
-	     "1:\n"
-	     "10:\t"
-		"movesw %2@+,%4\n\t"	/* add first word to sum */
-		"addw %4,%0\n\t"
-		"movew %4,%3@+\n\t"
-		"clrl %4\n\t"
-		"addxl %4,%0\n"		/* add X bit */
-	     "2:\t"
-		/* unrolled loop for the main part: do 8 longs at once */
-		"movel %1,%4\n\t"	/* save len in tmp1 */
-		"lsrl #5,%1\n\t"	/* len/32 */
-		"jeq 2f\n\t"		/* not enough... */
-		"subql #1,%1\n"
-	     "1:\n"
-	     "11:\t"
-		"movesl %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-	     "12:\t"
-		"movesl %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-	     "13:\t"
-		"movesl %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-	     "14:\t"
-		"movesl %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-	     "15:\t"
-		"movesl %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-	     "16:\t"
-		"movesl %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-	     "17:\t"
-		"movesl %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-	     "18:\t"
-		"movesl %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"dbra %1,1b\n\t"
-		"clrl %5\n\t"
-		"addxl %5,%0\n\t"	/* add X bit */
-		"clrw %1\n\t"
-		"subql #1,%1\n\t"
-		"jcc 1b\n"
-	     "2:\t"
-		"movel %4,%1\n\t"	/* restore len from tmp1 */
-		"andw #0x1c,%4\n\t"	/* number of rest longs */
-		"jeq 4f\n\t"
-		"lsrw #2,%4\n\t"
-		"subqw #1,%4\n"
-	     "3:\n"
-		/* loop for rest longs */
-	     "19:\t"
-		"movesl %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"dbra %4,3b\n\t"
-		"clrl %5\n\t"
-		"addxl %5,%0\n"		/* add X bit */
-	     "4:\t"
-		/* now check for rest bytes that do not fit into longs */
-		"andw #3,%1\n\t"
-		"jeq 7f\n\t"
-		"clrl %5\n\t"		/* clear tmp2 for rest bytes */
-		"subqw #2,%1\n\t"
-		"jlt 5f\n\t"
-	     "20:\t"
-		"movesw %2@+,%5\n\t"	/* have rest >= 2: get word */
-		"movew %5,%3@+\n\t"
-		"swap %5\n\t"		/* into bits 16..31 */
-		"tstw %1\n\t"		/* another byte? */
-		"jeq 6f\n"
-	     "5:\n"
-	     "21:\t"
-		"movesb %2@,%5\n\t"	/* have odd rest: get byte */
-		"moveb %5,%3@+\n\t"
-		"lslw #8,%5\n\t"	/* into bits 8..15; 16..31 untouched */
-	     "6:\t"
-		"addl %5,%0\n\t"	/* now add rest long to sum */
-		"clrl %5\n\t"
-		"addxl %5,%0\n\t"	/* add X bit */
-	     "7:\t"
-		"clrl %5\n"		/* no error - clear return value */
-	     "8:\n"
-		".section .fixup,\"ax\"\n"
-		".even\n"
-		/* If any exception occurs zero out the rest.
-		   Similarities with the code above are intentional :-) */
-	     "90:\t"
-		"clrw %3@+\n\t"
-		"movel %1,%4\n\t"
-		"lsrl #5,%1\n\t"
-		"jeq 1f\n\t"
-		"subql #1,%1\n"
-	     "91:\t"
-		"clrl %3@+\n"
-	     "92:\t"
-		"clrl %3@+\n"
-	     "93:\t"
-		"clrl %3@+\n"
-	     "94:\t"
-		"clrl %3@+\n"
-	     "95:\t"
-		"clrl %3@+\n"
-	     "96:\t"
-		"clrl %3@+\n"
-	     "97:\t"
-		"clrl %3@+\n"
-	     "98:\t"
-		"clrl %3@+\n\t"
-		"dbra %1,91b\n\t"
-		"clrw %1\n\t"
-		"subql #1,%1\n\t"
-		"jcc 91b\n"
-	     "1:\t"
-		"movel %4,%1\n\t"
-		"andw #0x1c,%4\n\t"
-		"jeq 1f\n\t"
-		"lsrw #2,%4\n\t"
-		"subqw #1,%4\n"
-	     "99:\t"
-		"clrl %3@+\n\t"
-		"dbra %4,99b\n\t"
-	     "1:\t"
-		"andw #3,%1\n\t"
-		"jeq 9f\n"
-	     "100:\t"
-		"clrw %3@+\n\t"
-		"tstw %1\n\t"
-		"jeq 9f\n"
-	     "101:\t"
-		"clrb %3@+\n"
-	     "9:\t"
-#define STR(X) STR1(X)
-#define STR1(X) #X
-		"moveq #-" STR(EFAULT) ",%5\n\t"
-		"jra 8b\n"
-		".previous\n"
-		".section __ex_table,\"a\"\n"
-		".long 10b,90b\n"
-		".long 11b,91b\n"
-		".long 12b,92b\n"
-		".long 13b,93b\n"
-		".long 14b,94b\n"
-		".long 15b,95b\n"
-		".long 16b,96b\n"
-		".long 17b,97b\n"
-		".long 18b,98b\n"
-		".long 19b,99b\n"
-		".long 20b,100b\n"
-		".long 21b,101b\n"
-		".previous"
-		: "=d" (sum), "=d" (len), "=a" (src), "=a" (dst),
-		  "=&d" (tmp1), "=d" (tmp2)
-		: "0" (sum), "1" (len), "2" (src), "3" (dst)
-	    );
-
-	*csum_err = tmp2;
-
-	return(sum);
-}
-
-EXPORT_SYMBOL(csum_partial_copy_from_user);
-
-
-/*
- * copy from kernel space while checksumming, otherwise like csum_partial
- */
-
-__wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
-{
-	unsigned long tmp1, tmp2;
-	__asm__("movel %2,%4\n\t"
-		"btst #1,%4\n\t"	/* Check alignment */
-		"jeq 2f\n\t"
-		"subql #2,%1\n\t"	/* buff%4==2: treat first word */
-		"jgt 1f\n\t"
-		"addql #2,%1\n\t"	/* len was == 2, treat only rest */
-		"jra 4f\n"
-	     "1:\t"
-		"movew %2@+,%4\n\t"	/* add first word to sum */
-		"addw %4,%0\n\t"
-		"movew %4,%3@+\n\t"
-		"clrl %4\n\t"
-		"addxl %4,%0\n"		/* add X bit */
-	     "2:\t"
-		/* unrolled loop for the main part: do 8 longs at once */
-		"movel %1,%4\n\t"	/* save len in tmp1 */
-		"lsrl #5,%1\n\t"	/* len/32 */
-		"jeq 2f\n\t"		/* not enough... */
-		"subql #1,%1\n"
-	     "1:\t"
-		"movel %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"movel %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"movel %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"movel %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"movel %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"movel %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"movel %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"movel %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"dbra %1,1b\n\t"
-		"clrl %5\n\t"
-		"addxl %5,%0\n\t"	/* add X bit */
-		"clrw %1\n\t"
-		"subql #1,%1\n\t"
-		"jcc 1b\n"
-	     "2:\t"
-		"movel %4,%1\n\t"	/* restore len from tmp1 */
-		"andw #0x1c,%4\n\t"	/* number of rest longs */
-		"jeq 4f\n\t"
-		"lsrw #2,%4\n\t"
-		"subqw #1,%4\n"
-	     "3:\t"
-		/* loop for rest longs */
-		"movel %2@+,%5\n\t"
-		"addxl %5,%0\n\t"
-		"movel %5,%3@+\n\t"
-		"dbra %4,3b\n\t"
-		"clrl %5\n\t"
-		"addxl %5,%0\n"		/* add X bit */
-	     "4:\t"
-		/* now check for rest bytes that do not fit into longs */
-		"andw #3,%1\n\t"
-		"jeq 7f\n\t"
-		"clrl %5\n\t"		/* clear tmp2 for rest bytes */
-		"subqw #2,%1\n\t"
-		"jlt 5f\n\t"
-		"movew %2@+,%5\n\t"	/* have rest >= 2: get word */
-		"movew %5,%3@+\n\t"
-		"swap %5\n\t"		/* into bits 16..31 */
-		"tstw %1\n\t"		/* another byte? */
-		"jeq 6f\n"
-	     "5:\t"
-		"moveb %2@,%5\n\t"	/* have odd rest: get byte */
-		"moveb %5,%3@+\n\t"
-		"lslw #8,%5\n"		/* into bits 8..15; 16..31 untouched */
-	     "6:\t"
-		"addl %5,%0\n\t"	/* now add rest long to sum */
-		"clrl %5\n\t"
-		"addxl %5,%0\n"		/* add X bit */
-	     "7:\t"
-		: "=d" (sum), "=d" (len), "=a" (src), "=a" (dst),
-		  "=&d" (tmp1), "=&d" (tmp2)
-		: "0" (sum), "1" (len), "2" (src), "3" (dst)
-	    );
-    return(sum);
-}
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
+#ifdef CONFIG_MMU
+#include "checksum_mm.c"
+#else
+#include "checksum_no.c"
+#endif

diff --git a/arch/m68k/lib/checksum_mm.c b/arch/m68k/lib/checksum_mm.c
new file mode 100644
index 0000000..6216f12
--- /dev/null
+++ b/arch/m68k/lib/checksum_mm.c

@@ -0,0 +1,425 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		IP/TCP/UDP checksumming routines
+ *
+ * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
+ *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *		Tom May, <ftom@netcom.com>
+ *		Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
+ *		Lots of code moved from tcp.c and ip.c; see those files
+ *		for more names.
+ *
+ * 03/02/96	Jes Sorensen, Andreas Schwab, Roman Hodek:
+ *		Fixed some nasty bugs, causing some horrible crashes.
+ *		A: At some points, the sum (%0) was used as
+ *		length-counter instead of the length counter
+ *		(%1). Thanks to Roman Hodek for pointing this out.
+ *		B: GCC seems to mess up if one uses too many
+ *		data-registers to hold input values and one tries to
+ *		specify d0 and d1 as scratch registers. Letting gcc
+ *		choose these registers itself solves the problem.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * 1998/8/31	Andreas Schwab:
+ *		Zero out rest of buffer on exception in
+ *		csum_partial_copy_from_user.
+ */
+
+#include <linux/module.h>
+#include <net/checksum.h>
+
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+	unsigned long tmp1, tmp2;
+	  /*
+	   * Experiments with ethernet and slip connections show that buff
+	   * is aligned on either a 2-byte or 4-byte boundary.
+	   */
+	__asm__("movel %2,%3\n\t"
+		"btst #1,%3\n\t"	/* Check alignment */
+		"jeq 2f\n\t"
+		"subql #2,%1\n\t"	/* buff%4==2: treat first word */
+		"jgt 1f\n\t"
+		"addql #2,%1\n\t"	/* len was == 2, treat only rest */
+		"jra 4f\n"
+	     "1:\t"
+		"addw %2@+,%0\n\t"	/* add first word to sum */
+		"clrl %3\n\t"
+		"addxl %3,%0\n"		/* add X bit */
+	     "2:\t"
+		/* unrolled loop for the main part: do 8 longs at once */
+		"movel %1,%3\n\t"	/* save len in tmp1 */
+		"lsrl #5,%1\n\t"	/* len/32 */
+		"jeq 2f\n\t"		/* not enough... */
+		"subql #1,%1\n"
+	     "1:\t"
+		"movel %2@+,%4\n\t"
+		"addxl %4,%0\n\t"
+		"movel %2@+,%4\n\t"
+		"addxl %4,%0\n\t"
+		"movel %2@+,%4\n\t"
+		"addxl %4,%0\n\t"
+		"movel %2@+,%4\n\t"
+		"addxl %4,%0\n\t"
+		"movel %2@+,%4\n\t"
+		"addxl %4,%0\n\t"
+		"movel %2@+,%4\n\t"
+		"addxl %4,%0\n\t"
+		"movel %2@+,%4\n\t"
+		"addxl %4,%0\n\t"
+		"movel %2@+,%4\n\t"
+		"addxl %4,%0\n\t"
+		"dbra %1,1b\n\t"
+		"clrl %4\n\t"
+		"addxl %4,%0\n\t"	/* add X bit */
+		"clrw %1\n\t"
+		"subql #1,%1\n\t"
+		"jcc 1b\n"
+	     "2:\t"
+		"movel %3,%1\n\t"	/* restore len from tmp1 */
+		"andw #0x1c,%3\n\t"	/* number of rest longs */
+		"jeq 4f\n\t"
+		"lsrw #2,%3\n\t"
+		"subqw #1,%3\n"
+	     "3:\t"
+		/* loop for rest longs */
+		"movel %2@+,%4\n\t"
+		"addxl %4,%0\n\t"
+		"dbra %3,3b\n\t"
+		"clrl %4\n\t"
+		"addxl %4,%0\n"		/* add X bit */
+	     "4:\t"
+		/* now check for rest bytes that do not fit into longs */
+		"andw #3,%1\n\t"
+		"jeq 7f\n\t"
+		"clrl %4\n\t"		/* clear tmp2 for rest bytes */
+		"subqw #2,%1\n\t"
+		"jlt 5f\n\t"
+		"movew %2@+,%4\n\t"	/* have rest >= 2: get word */
+		"swap %4\n\t"		/* into bits 16..31 */
+		"tstw %1\n\t"		/* another byte? */
+		"jeq 6f\n"
+	     "5:\t"
+		"moveb %2@,%4\n\t"	/* have odd rest: get byte */
+		"lslw #8,%4\n\t"	/* into bits 8..15; 16..31 untouched */
+	     "6:\t"
+		"addl %4,%0\n\t"	/* now add rest long to sum */
+		"clrl %4\n\t"
+		"addxl %4,%0\n"		/* add X bit */
+	     "7:\t"
+		: "=d" (sum), "=d" (len), "=a" (buff),
+		  "=&d" (tmp1), "=&d" (tmp2)
+		: "0" (sum), "1" (len), "2" (buff)
+	    );
+	return(sum);
+}
+
+EXPORT_SYMBOL(csum_partial);
+
+
+/*
+ * copy from user space while checksumming, with exception handling.
+ */
+
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst,
+			    int len, __wsum sum, int *csum_err)
+{
+	/*
+	 * GCC doesn't like more than 10 operands for the asm
+	 * statements so we have to use tmp2 for the error
+	 * code.
+	 */
+	unsigned long tmp1, tmp2;
+
+	__asm__("movel %2,%4\n\t"
+		"btst #1,%4\n\t"	/* Check alignment */
+		"jeq 2f\n\t"
+		"subql #2,%1\n\t"	/* buff%4==2: treat first word */
+		"jgt 1f\n\t"
+		"addql #2,%1\n\t"	/* len was == 2, treat only rest */
+		"jra 4f\n"
+	     "1:\n"
+	     "10:\t"
+		"movesw %2@+,%4\n\t"	/* add first word to sum */
+		"addw %4,%0\n\t"
+		"movew %4,%3@+\n\t"
+		"clrl %4\n\t"
+		"addxl %4,%0\n"		/* add X bit */
+	     "2:\t"
+		/* unrolled loop for the main part: do 8 longs at once */
+		"movel %1,%4\n\t"	/* save len in tmp1 */
+		"lsrl #5,%1\n\t"	/* len/32 */
+		"jeq 2f\n\t"		/* not enough... */
+		"subql #1,%1\n"
+	     "1:\n"
+	     "11:\t"
+		"movesl %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+	     "12:\t"
+		"movesl %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+	     "13:\t"
+		"movesl %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+	     "14:\t"
+		"movesl %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+	     "15:\t"
+		"movesl %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+	     "16:\t"
+		"movesl %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+	     "17:\t"
+		"movesl %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+	     "18:\t"
+		"movesl %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"dbra %1,1b\n\t"
+		"clrl %5\n\t"
+		"addxl %5,%0\n\t"	/* add X bit */
+		"clrw %1\n\t"
+		"subql #1,%1\n\t"
+		"jcc 1b\n"
+	     "2:\t"
+		"movel %4,%1\n\t"	/* restore len from tmp1 */
+		"andw #0x1c,%4\n\t"	/* number of rest longs */
+		"jeq 4f\n\t"
+		"lsrw #2,%4\n\t"
+		"subqw #1,%4\n"
+	     "3:\n"
+		/* loop for rest longs */
+	     "19:\t"
+		"movesl %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"dbra %4,3b\n\t"
+		"clrl %5\n\t"
+		"addxl %5,%0\n"		/* add X bit */
+	     "4:\t"
+		/* now check for rest bytes that do not fit into longs */
+		"andw #3,%1\n\t"
+		"jeq 7f\n\t"
+		"clrl %5\n\t"		/* clear tmp2 for rest bytes */
+		"subqw #2,%1\n\t"
+		"jlt 5f\n\t"
+	     "20:\t"
+		"movesw %2@+,%5\n\t"	/* have rest >= 2: get word */
+		"movew %5,%3@+\n\t"
+		"swap %5\n\t"		/* into bits 16..31 */
+		"tstw %1\n\t"		/* another byte? */
+		"jeq 6f\n"
+	     "5:\n"
+	     "21:\t"
+		"movesb %2@,%5\n\t"	/* have odd rest: get byte */
+		"moveb %5,%3@+\n\t"
+		"lslw #8,%5\n\t"	/* into bits 8..15; 16..31 untouched */
+	     "6:\t"
+		"addl %5,%0\n\t"	/* now add rest long to sum */
+		"clrl %5\n\t"
+		"addxl %5,%0\n\t"	/* add X bit */
+	     "7:\t"
+		"clrl %5\n"		/* no error - clear return value */
+	     "8:\n"
+		".section .fixup,\"ax\"\n"
+		".even\n"
+		/* If any exception occurs zero out the rest.
+		   Similarities with the code above are intentional :-) */
+	     "90:\t"
+		"clrw %3@+\n\t"
+		"movel %1,%4\n\t"
+		"lsrl #5,%1\n\t"
+		"jeq 1f\n\t"
+		"subql #1,%1\n"
+	     "91:\t"
+		"clrl %3@+\n"
+	     "92:\t"
+		"clrl %3@+\n"
+	     "93:\t"
+		"clrl %3@+\n"
+	     "94:\t"
+		"clrl %3@+\n"
+	     "95:\t"
+		"clrl %3@+\n"
+	     "96:\t"
+		"clrl %3@+\n"
+	     "97:\t"
+		"clrl %3@+\n"
+	     "98:\t"
+		"clrl %3@+\n\t"
+		"dbra %1,91b\n\t"
+		"clrw %1\n\t"
+		"subql #1,%1\n\t"
+		"jcc 91b\n"
+	     "1:\t"
+		"movel %4,%1\n\t"
+		"andw #0x1c,%4\n\t"
+		"jeq 1f\n\t"
+		"lsrw #2,%4\n\t"
+		"subqw #1,%4\n"
+	     "99:\t"
+		"clrl %3@+\n\t"
+		"dbra %4,99b\n\t"
+	     "1:\t"
+		"andw #3,%1\n\t"
+		"jeq 9f\n"
+	     "100:\t"
+		"clrw %3@+\n\t"
+		"tstw %1\n\t"
+		"jeq 9f\n"
+	     "101:\t"
+		"clrb %3@+\n"
+	     "9:\t"
+#define STR(X) STR1(X)
+#define STR1(X) #X
+		"moveq #-" STR(EFAULT) ",%5\n\t"
+		"jra 8b\n"
+		".previous\n"
+		".section __ex_table,\"a\"\n"
+		".long 10b,90b\n"
+		".long 11b,91b\n"
+		".long 12b,92b\n"
+		".long 13b,93b\n"
+		".long 14b,94b\n"
+		".long 15b,95b\n"
+		".long 16b,96b\n"
+		".long 17b,97b\n"
+		".long 18b,98b\n"
+		".long 19b,99b\n"
+		".long 20b,100b\n"
+		".long 21b,101b\n"
+		".previous"
+		: "=d" (sum), "=d" (len), "=a" (src), "=a" (dst),
+		  "=&d" (tmp1), "=d" (tmp2)
+		: "0" (sum), "1" (len), "2" (src), "3" (dst)
+	    );
+
+	*csum_err = tmp2;
+
+	return(sum);
+}
+
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+
+/*
+ * copy from kernel space while checksumming, otherwise like csum_partial
+ */
+
+__wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+{
+	unsigned long tmp1, tmp2;
+	__asm__("movel %2,%4\n\t"
+		"btst #1,%4\n\t"	/* Check alignment */
+		"jeq 2f\n\t"
+		"subql #2,%1\n\t"	/* buff%4==2: treat first word */
+		"jgt 1f\n\t"
+		"addql #2,%1\n\t"	/* len was == 2, treat only rest */
+		"jra 4f\n"
+	     "1:\t"
+		"movew %2@+,%4\n\t"	/* add first word to sum */
+		"addw %4,%0\n\t"
+		"movew %4,%3@+\n\t"
+		"clrl %4\n\t"
+		"addxl %4,%0\n"		/* add X bit */
+	     "2:\t"
+		/* unrolled loop for the main part: do 8 longs at once */
+		"movel %1,%4\n\t"	/* save len in tmp1 */
+		"lsrl #5,%1\n\t"	/* len/32 */
+		"jeq 2f\n\t"		/* not enough... */
+		"subql #1,%1\n"
+	     "1:\t"
+		"movel %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"movel %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"movel %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"movel %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"movel %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"movel %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"movel %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"movel %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"dbra %1,1b\n\t"
+		"clrl %5\n\t"
+		"addxl %5,%0\n\t"	/* add X bit */
+		"clrw %1\n\t"
+		"subql #1,%1\n\t"
+		"jcc 1b\n"
+	     "2:\t"
+		"movel %4,%1\n\t"	/* restore len from tmp1 */
+		"andw #0x1c,%4\n\t"	/* number of rest longs */
+		"jeq 4f\n\t"
+		"lsrw #2,%4\n\t"
+		"subqw #1,%4\n"
+	     "3:\t"
+		/* loop for rest longs */
+		"movel %2@+,%5\n\t"
+		"addxl %5,%0\n\t"
+		"movel %5,%3@+\n\t"
+		"dbra %4,3b\n\t"
+		"clrl %5\n\t"
+		"addxl %5,%0\n"		/* add X bit */
+	     "4:\t"
+		/* now check for rest bytes that do not fit into longs */
+		"andw #3,%1\n\t"
+		"jeq 7f\n\t"
+		"clrl %5\n\t"		/* clear tmp2 for rest bytes */
+		"subqw #2,%1\n\t"
+		"jlt 5f\n\t"
+		"movew %2@+,%5\n\t"	/* have rest >= 2: get word */
+		"movew %5,%3@+\n\t"
+		"swap %5\n\t"		/* into bits 16..31 */
+		"tstw %1\n\t"		/* another byte? */
+		"jeq 6f\n"
+	     "5:\t"
+		"moveb %2@,%5\n\t"	/* have odd rest: get byte */
+		"moveb %5,%3@+\n\t"
+		"lslw #8,%5\n"		/* into bits 8..15; 16..31 untouched */
+	     "6:\t"
+		"addl %5,%0\n\t"	/* now add rest long to sum */
+		"clrl %5\n\t"
+		"addxl %5,%0\n"		/* add X bit */
+	     "7:\t"
+		: "=d" (sum), "=d" (len), "=a" (src), "=a" (dst),
+		  "=&d" (tmp1), "=&d" (tmp2)
+		: "0" (sum), "1" (len), "2" (src), "3" (dst)
+	    );
+    return(sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);

diff --git a/arch/m68knommu/lib/checksum.c b/arch/m68k/lib/checksum_no.c
similarity index 100%
rename from arch/m68knommu/lib/checksum.c
rename to arch/m68k/lib/checksum_no.c


diff --git a/arch/m68knommu/lib/delay.c b/arch/m68k/lib/delay.c
similarity index 100%
rename from arch/m68knommu/lib/delay.c
rename to arch/m68k/lib/delay.c


diff --git a/arch/m68knommu/lib/divsi3.S b/arch/m68k/lib/divsi3.S
similarity index 100%
rename from arch/m68knommu/lib/divsi3.S
rename to arch/m68k/lib/divsi3.S


diff --git a/arch/m68knommu/lib/memcpy.c b/arch/m68k/lib/memcpy.c
similarity index 100%
rename from arch/m68knommu/lib/memcpy.c
rename to arch/m68k/lib/memcpy.c


diff --git a/arch/m68knommu/lib/memmove.c b/arch/m68k/lib/memmove.c
similarity index 100%
rename from arch/m68knommu/lib/memmove.c
rename to arch/m68k/lib/memmove.c


diff --git a/arch/m68knommu/lib/memset.c b/arch/m68k/lib/memset.c
similarity index 100%
rename from arch/m68knommu/lib/memset.c
rename to arch/m68k/lib/memset.c


diff --git a/arch/m68knommu/lib/modsi3.S b/arch/m68k/lib/modsi3.S
similarity index 100%
rename from arch/m68knommu/lib/modsi3.S
rename to arch/m68k/lib/modsi3.S


diff --git a/arch/m68k/lib/muldi3.c b/arch/m68k/lib/muldi3.c
index be4f275..16e0eb3 100644
--- a/arch/m68k/lib/muldi3.c
+++ b/arch/m68k/lib/muldi3.c

@@ -1,63 +1,5 @@
-/* muldi3.c extracted from gcc-2.7.2.3/libgcc2.c and
-			   gcc-2.7.2.3/longlong.h which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-#define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulu%.l %3,%1:%0"						\
-           : "=d" ((USItype)(w0)),					\
-             "=d" ((USItype)(w1))					\
-           : "%0" ((USItype)(u)),					\
-             "dmi" ((USItype)(v)))
-
-#define __umulsidi3(u, v) \
-  ({DIunion __w;							\
-    umul_ppmm (__w.s.high, __w.s.low, u, v);				\
-    __w.ll; })
-
-typedef		 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__muldi3 (DItype u, DItype v)
-{
-  DIunion w;
-  DIunion uu, vv;
-
-  uu.ll = u,
-  vv.ll = v;
-
-  w.ll = __umulsidi3 (uu.s.low, vv.s.low);
-  w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high
-	       + (USItype) uu.s.high * (USItype) vv.s.low);
-
-  return w.ll;
-}
+#ifdef CONFIG_MMU
+#include "muldi3_mm.c"
+#else
+#include "muldi3_no.c"
+#endif

diff --git a/arch/m68knommu/lib/ashrdi3.c b/arch/m68k/lib/muldi3_mm.c
similarity index 60%
rename from arch/m68knommu/lib/ashrdi3.c
rename to arch/m68k/lib/muldi3_mm.c
index 78efb65..be4f275 100644
--- a/arch/m68knommu/lib/ashrdi3.c
+++ b/arch/m68k/lib/muldi3_mm.c

@@ -1,4 +1,5 @@
-/* ashrdi3.c extracted from gcc-2.7.2/libgcc2.c which is: */
+/* muldi3.c extracted from gcc-2.7.2.3/libgcc2.c and
+			   gcc-2.7.2.3/longlong.h which is: */
 /* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
@@ -20,7 +21,19 @@
 
 #define BITS_PER_UNIT 8
 
-typedef 	 int SItype	__attribute__ ((mode (SI)));
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulu%.l %3,%1:%0"						\
+           : "=d" ((USItype)(w0)),					\
+             "=d" ((USItype)(w1))					\
+           : "%0" ((USItype)(u)),					\
+             "dmi" ((USItype)(v)))
+
+#define __umulsidi3(u, v) \
+  ({DIunion __w;							\
+    umul_ppmm (__w.s.high, __w.s.low, u, v);				\
+    __w.ll; })
+
+typedef		 int SItype	__attribute__ ((mode (SI)));
 typedef unsigned int USItype	__attribute__ ((mode (SI)));
 typedef		 int DItype	__attribute__ ((mode (DI)));
 typedef int word_type __attribute__ ((mode (__word__)));
@@ -34,30 +47,17 @@
 } DIunion;
 
 DItype
-__ashrdi3 (DItype u, word_type b)
+__muldi3 (DItype u, DItype v)
 {
   DIunion w;
-  word_type bm;
-  DIunion uu;
+  DIunion uu, vv;
 
-  if (b == 0)
-    return u;
+  uu.ll = u,
+  vv.ll = v;
 
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      /* w.s.high = 1..1 or 0..0 */
-      w.s.high = uu.s.high >> (sizeof (SItype) * BITS_PER_UNIT - 1);
-      w.s.low = uu.s.high >> -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.high << bm;
-      w.s.high = uu.s.high >> b;
-      w.s.low = ((USItype)uu.s.low >> b) | carries;
-    }
+  w.ll = __umulsidi3 (uu.s.low, vv.s.low);
+  w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high
+	       + (USItype) uu.s.high * (USItype) vv.s.low);
 
   return w.ll;
 }

diff --git a/arch/m68knommu/lib/muldi3.c b/arch/m68k/lib/muldi3_no.c
similarity index 100%
rename from arch/m68knommu/lib/muldi3.c
rename to arch/m68k/lib/muldi3_no.c


diff --git a/arch/m68knommu/lib/mulsi3.S b/arch/m68k/lib/mulsi3.S
similarity index 100%
rename from arch/m68knommu/lib/mulsi3.S
rename to arch/m68k/lib/mulsi3.S


diff --git a/arch/m68knommu/lib/udivsi3.S b/arch/m68k/lib/udivsi3.S
similarity index 100%
rename from arch/m68knommu/lib/udivsi3.S
rename to arch/m68k/lib/udivsi3.S


diff --git a/arch/m68knommu/lib/umodsi3.S b/arch/m68k/lib/umodsi3.S
similarity index 100%
rename from arch/m68knommu/lib/umodsi3.S
rename to arch/m68k/lib/umodsi3.S


diff --git a/arch/m68k/mm/Makefile b/arch/m68k/mm/Makefile
index 5eaa43c..b60270e 100644
--- a/arch/m68k/mm/Makefile
+++ b/arch/m68k/mm/Makefile

@@ -1,8 +1,5 @@
-#
-# Makefile for the linux m68k-specific parts of the memory manager.
-#
-
-obj-y		:= cache.o init.o fault.o hwtest.o
-
-obj-$(CONFIG_MMU_MOTOROLA)	+= kmap.o memory.o motorola.o
-obj-$(CONFIG_MMU_SUN3)		+= sun3kmap.o sun3mmu.o
+ifdef CONFIG_MMU
+include arch/m68k/mm/Makefile_mm
+else
+include arch/m68k/mm/Makefile_no
+endif

diff --git a/arch/m68k/mm/Makefile_mm b/arch/m68k/mm/Makefile_mm
new file mode 100644
index 0000000..5eaa43c
--- /dev/null
+++ b/arch/m68k/mm/Makefile_mm

@@ -0,0 +1,8 @@
+#
+# Makefile for the linux m68k-specific parts of the memory manager.
+#
+
+obj-y		:= cache.o init.o fault.o hwtest.o
+
+obj-$(CONFIG_MMU_MOTOROLA)	+= kmap.o memory.o motorola.o
+obj-$(CONFIG_MMU_SUN3)		+= sun3kmap.o sun3mmu.o

diff --git a/arch/m68knommu/mm/Makefile b/arch/m68k/mm/Makefile_no
similarity index 100%
rename from arch/m68knommu/mm/Makefile
rename to arch/m68k/mm/Makefile_no


diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 8bc8425..27b5ce0 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c

@@ -1,150 +1,5 @@
-/*
- *  linux/arch/m68k/mm/init.c
- *
- *  Copyright (C) 1995  Hamish Macdonald
- *
- *  Contains common initialization routines, specific init code moved
- *  to motorola.c and sun3mmu.c
- */
-
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/gfp.h>
-
-#include <asm/setup.h>
-#include <asm/uaccess.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/system.h>
-#include <asm/machdep.h>
-#include <asm/io.h>
-#ifdef CONFIG_ATARI
-#include <asm/atari_stram.h>
-#endif
-#include <asm/sections.h>
-#include <asm/tlb.h>
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-pg_data_t pg_data_map[MAX_NUMNODES];
-EXPORT_SYMBOL(pg_data_map);
-
-int m68k_virt_to_node_shift;
-
-#ifndef CONFIG_SINGLE_MEMORY_CHUNK
-pg_data_t *pg_data_table[65];
-EXPORT_SYMBOL(pg_data_table);
-#endif
-
-void __init m68k_setup_node(int node)
-{
-#ifndef CONFIG_SINGLE_MEMORY_CHUNK
-	struct mem_info *info = m68k_memory + node;
-	int i, end;
-
-	i = (unsigned long)phys_to_virt(info->addr) >> __virt_to_node_shift();
-	end = (unsigned long)phys_to_virt(info->addr + info->size - 1) >> __virt_to_node_shift();
-	for (; i <= end; i++) {
-		if (pg_data_table[i])
-			printk("overlap at %u for chunk %u\n", i, node);
-		pg_data_table[i] = pg_data_map + node;
-	}
-#endif
-	pg_data_map[node].bdata = bootmem_node_data + node;
-	node_set_online(node);
-}
-
-
-/*
- * ZERO_PAGE is a special page that is used for zero-initialized
- * data and COW.
- */
-
-void *empty_zero_page;
-EXPORT_SYMBOL(empty_zero_page);
-
-extern void init_pointer_table(unsigned long ptable);
-
-/* References to section boundaries */
-
-extern pmd_t *zero_pgtable;
-
-void __init mem_init(void)
-{
-	pg_data_t *pgdat;
-	int codepages = 0;
-	int datapages = 0;
-	int initpages = 0;
-	int i;
-
-#ifdef CONFIG_ATARI
-	if (MACH_IS_ATARI)
-		atari_stram_mem_init_hook();
-#endif
-
-	/* this will put all memory onto the freelists */
-	totalram_pages = num_physpages = 0;
-	for_each_online_pgdat(pgdat) {
-		num_physpages += pgdat->node_present_pages;
-
-		totalram_pages += free_all_bootmem_node(pgdat);
-		for (i = 0; i < pgdat->node_spanned_pages; i++) {
-			struct page *page = pgdat->node_mem_map + i;
-			char *addr = page_to_virt(page);
-
-			if (!PageReserved(page))
-				continue;
-			if (addr >= _text &&
-			    addr < _etext)
-				codepages++;
-			else if (addr >= __init_begin &&
-				 addr < __init_end)
-				initpages++;
-			else
-				datapages++;
-		}
-	}
-
-#ifndef CONFIG_SUN3
-	/* insert pointer tables allocated so far into the tablelist */
-	init_pointer_table((unsigned long)kernel_pg_dir);
-	for (i = 0; i < PTRS_PER_PGD; i++) {
-		if (pgd_present(kernel_pg_dir[i]))
-			init_pointer_table(__pgd_page(kernel_pg_dir[i]));
-	}
-
-	/* insert also pointer table that we used to unmap the zero page */
-	if (zero_pgtable)
-		init_pointer_table((unsigned long)zero_pgtable);
-#endif
-
-	printk("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init)\n",
-	       nr_free_pages() << (PAGE_SHIFT-10),
-	       totalram_pages << (PAGE_SHIFT-10),
-	       codepages << (PAGE_SHIFT-10),
-	       datapages << (PAGE_SHIFT-10),
-	       initpages << (PAGE_SHIFT-10));
-}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-	int pages = 0;
-	for (; start < end; start += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(start));
-		init_page_count(virt_to_page(start));
-		free_page(start);
-		totalram_pages++;
-		pages++;
-	}
-	printk ("Freeing initrd memory: %dk freed\n", pages);
-}
+#ifdef CONFIG_MMU
+#include "init_mm.c"
+#else
+#include "init_no.c"
 #endif

diff --git a/arch/m68k/mm/init_mm.c b/arch/m68k/mm/init_mm.c
new file mode 100644
index 0000000..8bc8425
--- /dev/null
+++ b/arch/m68k/mm/init_mm.c

@@ -0,0 +1,150 @@
+/*
+ *  linux/arch/m68k/mm/init.c
+ *
+ *  Copyright (C) 1995  Hamish Macdonald
+ *
+ *  Contains common initialization routines, specific init code moved
+ *  to motorola.c and sun3mmu.c
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/gfp.h>
+
+#include <asm/setup.h>
+#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/system.h>
+#include <asm/machdep.h>
+#include <asm/io.h>
+#ifdef CONFIG_ATARI
+#include <asm/atari_stram.h>
+#endif
+#include <asm/sections.h>
+#include <asm/tlb.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+pg_data_t pg_data_map[MAX_NUMNODES];
+EXPORT_SYMBOL(pg_data_map);
+
+int m68k_virt_to_node_shift;
+
+#ifndef CONFIG_SINGLE_MEMORY_CHUNK
+pg_data_t *pg_data_table[65];
+EXPORT_SYMBOL(pg_data_table);
+#endif
+
+void __init m68k_setup_node(int node)
+{
+#ifndef CONFIG_SINGLE_MEMORY_CHUNK
+	struct mem_info *info = m68k_memory + node;
+	int i, end;
+
+	i = (unsigned long)phys_to_virt(info->addr) >> __virt_to_node_shift();
+	end = (unsigned long)phys_to_virt(info->addr + info->size - 1) >> __virt_to_node_shift();
+	for (; i <= end; i++) {
+		if (pg_data_table[i])
+			printk("overlap at %u for chunk %u\n", i, node);
+		pg_data_table[i] = pg_data_map + node;
+	}
+#endif
+	pg_data_map[node].bdata = bootmem_node_data + node;
+	node_set_online(node);
+}
+
+
+/*
+ * ZERO_PAGE is a special page that is used for zero-initialized
+ * data and COW.
+ */
+
+void *empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
+
+extern void init_pointer_table(unsigned long ptable);
+
+/* References to section boundaries */
+
+extern pmd_t *zero_pgtable;
+
+void __init mem_init(void)
+{
+	pg_data_t *pgdat;
+	int codepages = 0;
+	int datapages = 0;
+	int initpages = 0;
+	int i;
+
+#ifdef CONFIG_ATARI
+	if (MACH_IS_ATARI)
+		atari_stram_mem_init_hook();
+#endif
+
+	/* this will put all memory onto the freelists */
+	totalram_pages = num_physpages = 0;
+	for_each_online_pgdat(pgdat) {
+		num_physpages += pgdat->node_present_pages;
+
+		totalram_pages += free_all_bootmem_node(pgdat);
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			struct page *page = pgdat->node_mem_map + i;
+			char *addr = page_to_virt(page);
+
+			if (!PageReserved(page))
+				continue;
+			if (addr >= _text &&
+			    addr < _etext)
+				codepages++;
+			else if (addr >= __init_begin &&
+				 addr < __init_end)
+				initpages++;
+			else
+				datapages++;
+		}
+	}
+
+#ifndef CONFIG_SUN3
+	/* insert pointer tables allocated so far into the tablelist */
+	init_pointer_table((unsigned long)kernel_pg_dir);
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		if (pgd_present(kernel_pg_dir[i]))
+			init_pointer_table(__pgd_page(kernel_pg_dir[i]));
+	}
+
+	/* insert also pointer table that we used to unmap the zero page */
+	if (zero_pgtable)
+		init_pointer_table((unsigned long)zero_pgtable);
+#endif
+
+	printk("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init)\n",
+	       nr_free_pages() << (PAGE_SHIFT-10),
+	       totalram_pages << (PAGE_SHIFT-10),
+	       codepages << (PAGE_SHIFT-10),
+	       datapages << (PAGE_SHIFT-10),
+	       initpages << (PAGE_SHIFT-10));
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	int pages = 0;
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		init_page_count(virt_to_page(start));
+		free_page(start);
+		totalram_pages++;
+		pages++;
+	}
+	printk ("Freeing initrd memory: %dk freed\n", pages);
+}
+#endif

diff --git a/arch/m68knommu/mm/init.c b/arch/m68k/mm/init_no.c
similarity index 100%
rename from arch/m68knommu/mm/init.c
rename to arch/m68k/mm/init_no.c


diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 6934584..a373d13 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c

@@ -1,367 +1,5 @@
-/*
- *  linux/arch/m68k/mm/kmap.c
- *
- *  Copyright (C) 1997 Roman Hodek
- *
- *  10/01/99 cleaned up the code and changing to the same interface
- *	     used by other architectures		/Roman Zippel
- */
-
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include <asm/setup.h>
-#include <asm/segment.h>
-#include <asm/page.h>
-#include <asm/pgalloc.h>
-#include <asm/io.h>
-#include <asm/system.h>
-
-#undef DEBUG
-
-#define PTRTREESIZE	(256*1024)
-
-/*
- * For 040/060 we can use the virtual memory area like other architectures,
- * but for 020/030 we want to use early termination page descriptor and we
- * can't mix this with normal page descriptors, so we have to copy that code
- * (mm/vmalloc.c) and return appriorate aligned addresses.
- */
-
-#ifdef CPU_M68040_OR_M68060_ONLY
-
-#define IO_SIZE		PAGE_SIZE
-
-static inline struct vm_struct *get_io_area(unsigned long size)
-{
-	return get_vm_area(size, VM_IOREMAP);
-}
-
-
-static inline void free_io_area(void *addr)
-{
-	vfree((void *)(PAGE_MASK & (unsigned long)addr));
-}
-
+#ifdef CONFIG_MMU
+#include "kmap_mm.c"
 #else
-
-#define IO_SIZE		(256*1024)
-
-static struct vm_struct *iolist;
-
-static struct vm_struct *get_io_area(unsigned long size)
-{
-	unsigned long addr;
-	struct vm_struct **p, *tmp, *area;
-
-	area = kmalloc(sizeof(*area), GFP_KERNEL);
-	if (!area)
-		return NULL;
-	addr = KMAP_START;
-	for (p = &iolist; (tmp = *p) ; p = &tmp->next) {
-		if (size + addr < (unsigned long)tmp->addr)
-			break;
-		if (addr > KMAP_END-size) {
-			kfree(area);
-			return NULL;
-		}
-		addr = tmp->size + (unsigned long)tmp->addr;
-	}
-	area->addr = (void *)addr;
-	area->size = size + IO_SIZE;
-	area->next = *p;
-	*p = area;
-	return area;
-}
-
-static inline void free_io_area(void *addr)
-{
-	struct vm_struct **p, *tmp;
-
-	if (!addr)
-		return;
-	addr = (void *)((unsigned long)addr & -IO_SIZE);
-	for (p = &iolist ; (tmp = *p) ; p = &tmp->next) {
-		if (tmp->addr == addr) {
-			*p = tmp->next;
-			__iounmap(tmp->addr, tmp->size);
-			kfree(tmp);
-			return;
-		}
-	}
-}
-
+#include "kmap_no.c"
 #endif
-
-/*
- * Map some physical address range into the kernel address space.
- */
-/* Rewritten by Andreas Schwab to remove all races. */
-
-void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag)
-{
-	struct vm_struct *area;
-	unsigned long virtaddr, retaddr;
-	long offset;
-	pgd_t *pgd_dir;
-	pmd_t *pmd_dir;
-	pte_t *pte_dir;
-
-	/*
-	 * Don't allow mappings that wrap..
-	 */
-	if (!size || physaddr > (unsigned long)(-size))
-		return NULL;
-
-#ifdef CONFIG_AMIGA
-	if (MACH_IS_AMIGA) {
-		if ((physaddr >= 0x40000000) && (physaddr + size < 0x60000000)
-		    && (cacheflag == IOMAP_NOCACHE_SER))
-			return (void __iomem *)physaddr;
-	}
-#endif
-
-#ifdef DEBUG
-	printk("ioremap: 0x%lx,0x%lx(%d) - ", physaddr, size, cacheflag);
-#endif
-	/*
-	 * Mappings have to be aligned
-	 */
-	offset = physaddr & (IO_SIZE - 1);
-	physaddr &= -IO_SIZE;
-	size = (size + offset + IO_SIZE - 1) & -IO_SIZE;
-
-	/*
-	 * Ok, go for it..
-	 */
-	area = get_io_area(size);
-	if (!area)
-		return NULL;
-
-	virtaddr = (unsigned long)area->addr;
-	retaddr = virtaddr + offset;
-#ifdef DEBUG
-	printk("0x%lx,0x%lx,0x%lx", physaddr, virtaddr, retaddr);
-#endif
-
-	/*
-	 * add cache and table flags to physical address
-	 */
-	if (CPU_IS_040_OR_060) {
-		physaddr |= (_PAGE_PRESENT | _PAGE_GLOBAL040 |
-			     _PAGE_ACCESSED | _PAGE_DIRTY);
-		switch (cacheflag) {
-		case IOMAP_FULL_CACHING:
-			physaddr |= _PAGE_CACHE040;
-			break;
-		case IOMAP_NOCACHE_SER:
-		default:
-			physaddr |= _PAGE_NOCACHE_S;
-			break;
-		case IOMAP_NOCACHE_NONSER:
-			physaddr |= _PAGE_NOCACHE;
-			break;
-		case IOMAP_WRITETHROUGH:
-			physaddr |= _PAGE_CACHE040W;
-			break;
-		}
-	} else {
-		physaddr |= (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
-		switch (cacheflag) {
-		case IOMAP_NOCACHE_SER:
-		case IOMAP_NOCACHE_NONSER:
-		default:
-			physaddr |= _PAGE_NOCACHE030;
-			break;
-		case IOMAP_FULL_CACHING:
-		case IOMAP_WRITETHROUGH:
-			break;
-		}
-	}
-
-	while ((long)size > 0) {
-#ifdef DEBUG
-		if (!(virtaddr & (PTRTREESIZE-1)))
-			printk ("\npa=%#lx va=%#lx ", physaddr, virtaddr);
-#endif
-		pgd_dir = pgd_offset_k(virtaddr);
-		pmd_dir = pmd_alloc(&init_mm, pgd_dir, virtaddr);
-		if (!pmd_dir) {
-			printk("ioremap: no mem for pmd_dir\n");
-			return NULL;
-		}
-
-		if (CPU_IS_020_OR_030) {
-			pmd_dir->pmd[(virtaddr/PTRTREESIZE) & 15] = physaddr;
-			physaddr += PTRTREESIZE;
-			virtaddr += PTRTREESIZE;
-			size -= PTRTREESIZE;
-		} else {
-			pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
-			if (!pte_dir) {
-				printk("ioremap: no mem for pte_dir\n");
-				return NULL;
-			}
-
-			pte_val(*pte_dir) = physaddr;
-			virtaddr += PAGE_SIZE;
-			physaddr += PAGE_SIZE;
-			size -= PAGE_SIZE;
-		}
-	}
-#ifdef DEBUG
-	printk("\n");
-#endif
-	flush_tlb_all();
-
-	return (void __iomem *)retaddr;
-}
-EXPORT_SYMBOL(__ioremap);
-
-/*
- * Unmap a ioremap()ed region again
- */
-void iounmap(void __iomem *addr)
-{
-#ifdef CONFIG_AMIGA
-	if ((!MACH_IS_AMIGA) ||
-	    (((unsigned long)addr < 0x40000000) ||
-	     ((unsigned long)addr > 0x60000000)))
-			free_io_area((__force void *)addr);
-#else
-	free_io_area((__force void *)addr);
-#endif
-}
-EXPORT_SYMBOL(iounmap);
-
-/*
- * __iounmap unmaps nearly everything, so be careful
- * it doesn't free currently pointer/page tables anymore but it
- * wans't used anyway and might be added later.
- */
-void __iounmap(void *addr, unsigned long size)
-{
-	unsigned long virtaddr = (unsigned long)addr;
-	pgd_t *pgd_dir;
-	pmd_t *pmd_dir;
-	pte_t *pte_dir;
-
-	while ((long)size > 0) {
-		pgd_dir = pgd_offset_k(virtaddr);
-		if (pgd_bad(*pgd_dir)) {
-			printk("iounmap: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
-			pgd_clear(pgd_dir);
-			return;
-		}
-		pmd_dir = pmd_offset(pgd_dir, virtaddr);
-
-		if (CPU_IS_020_OR_030) {
-			int pmd_off = (virtaddr/PTRTREESIZE) & 15;
-			int pmd_type = pmd_dir->pmd[pmd_off] & _DESCTYPE_MASK;
-
-			if (pmd_type == _PAGE_PRESENT) {
-				pmd_dir->pmd[pmd_off] = 0;
-				virtaddr += PTRTREESIZE;
-				size -= PTRTREESIZE;
-				continue;
-			} else if (pmd_type == 0)
-				continue;
-		}
-
-		if (pmd_bad(*pmd_dir)) {
-			printk("iounmap: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
-			pmd_clear(pmd_dir);
-			return;
-		}
-		pte_dir = pte_offset_kernel(pmd_dir, virtaddr);
-
-		pte_val(*pte_dir) = 0;
-		virtaddr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	flush_tlb_all();
-}
-
-/*
- * Set new cache mode for some kernel address space.
- * The caller must push data for that range itself, if such data may already
- * be in the cache.
- */
-void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
-{
-	unsigned long virtaddr = (unsigned long)addr;
-	pgd_t *pgd_dir;
-	pmd_t *pmd_dir;
-	pte_t *pte_dir;
-
-	if (CPU_IS_040_OR_060) {
-		switch (cmode) {
-		case IOMAP_FULL_CACHING:
-			cmode = _PAGE_CACHE040;
-			break;
-		case IOMAP_NOCACHE_SER:
-		default:
-			cmode = _PAGE_NOCACHE_S;
-			break;
-		case IOMAP_NOCACHE_NONSER:
-			cmode = _PAGE_NOCACHE;
-			break;
-		case IOMAP_WRITETHROUGH:
-			cmode = _PAGE_CACHE040W;
-			break;
-		}
-	} else {
-		switch (cmode) {
-		case IOMAP_NOCACHE_SER:
-		case IOMAP_NOCACHE_NONSER:
-		default:
-			cmode = _PAGE_NOCACHE030;
-			break;
-		case IOMAP_FULL_CACHING:
-		case IOMAP_WRITETHROUGH:
-			cmode = 0;
-		}
-	}
-
-	while ((long)size > 0) {
-		pgd_dir = pgd_offset_k(virtaddr);
-		if (pgd_bad(*pgd_dir)) {
-			printk("iocachemode: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
-			pgd_clear(pgd_dir);
-			return;
-		}
-		pmd_dir = pmd_offset(pgd_dir, virtaddr);
-
-		if (CPU_IS_020_OR_030) {
-			int pmd_off = (virtaddr/PTRTREESIZE) & 15;
-
-			if ((pmd_dir->pmd[pmd_off] & _DESCTYPE_MASK) == _PAGE_PRESENT) {
-				pmd_dir->pmd[pmd_off] = (pmd_dir->pmd[pmd_off] &
-							 _CACHEMASK040) | cmode;
-				virtaddr += PTRTREESIZE;
-				size -= PTRTREESIZE;
-				continue;
-			}
-		}
-
-		if (pmd_bad(*pmd_dir)) {
-			printk("iocachemode: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
-			pmd_clear(pmd_dir);
-			return;
-		}
-		pte_dir = pte_offset_kernel(pmd_dir, virtaddr);
-
-		pte_val(*pte_dir) = (pte_val(*pte_dir) & _CACHEMASK040) | cmode;
-		virtaddr += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	flush_tlb_all();
-}
-EXPORT_SYMBOL(kernel_set_cachemode);

diff --git a/arch/m68k/mm/kmap_mm.c b/arch/m68k/mm/kmap_mm.c
new file mode 100644
index 0000000..6934584
--- /dev/null
+++ b/arch/m68k/mm/kmap_mm.c

@@ -0,0 +1,367 @@
+/*
+ *  linux/arch/m68k/mm/kmap.c
+ *
+ *  Copyright (C) 1997 Roman Hodek
+ *
+ *  10/01/99 cleaned up the code and changing to the same interface
+ *	     used by other architectures		/Roman Zippel
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/setup.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
+#include <asm/system.h>
+
+#undef DEBUG
+
+#define PTRTREESIZE	(256*1024)
+
+/*
+ * For 040/060 we can use the virtual memory area like other architectures,
+ * but for 020/030 we want to use early termination page descriptor and we
+ * can't mix this with normal page descriptors, so we have to copy that code
+ * (mm/vmalloc.c) and return appriorate aligned addresses.
+ */
+
+#ifdef CPU_M68040_OR_M68060_ONLY
+
+#define IO_SIZE		PAGE_SIZE
+
+static inline struct vm_struct *get_io_area(unsigned long size)
+{
+	return get_vm_area(size, VM_IOREMAP);
+}
+
+
+static inline void free_io_area(void *addr)
+{
+	vfree((void *)(PAGE_MASK & (unsigned long)addr));
+}
+
+#else
+
+#define IO_SIZE		(256*1024)
+
+static struct vm_struct *iolist;
+
+static struct vm_struct *get_io_area(unsigned long size)
+{
+	unsigned long addr;
+	struct vm_struct **p, *tmp, *area;
+
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
+	addr = KMAP_START;
+	for (p = &iolist; (tmp = *p) ; p = &tmp->next) {
+		if (size + addr < (unsigned long)tmp->addr)
+			break;
+		if (addr > KMAP_END-size) {
+			kfree(area);
+			return NULL;
+		}
+		addr = tmp->size + (unsigned long)tmp->addr;
+	}
+	area->addr = (void *)addr;
+	area->size = size + IO_SIZE;
+	area->next = *p;
+	*p = area;
+	return area;
+}
+
+static inline void free_io_area(void *addr)
+{
+	struct vm_struct **p, *tmp;
+
+	if (!addr)
+		return;
+	addr = (void *)((unsigned long)addr & -IO_SIZE);
+	for (p = &iolist ; (tmp = *p) ; p = &tmp->next) {
+		if (tmp->addr == addr) {
+			*p = tmp->next;
+			__iounmap(tmp->addr, tmp->size);
+			kfree(tmp);
+			return;
+		}
+	}
+}
+
+#endif
+
+/*
+ * Map some physical address range into the kernel address space.
+ */
+/* Rewritten by Andreas Schwab to remove all races. */
+
+void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag)
+{
+	struct vm_struct *area;
+	unsigned long virtaddr, retaddr;
+	long offset;
+	pgd_t *pgd_dir;
+	pmd_t *pmd_dir;
+	pte_t *pte_dir;
+
+	/*
+	 * Don't allow mappings that wrap..
+	 */
+	if (!size || physaddr > (unsigned long)(-size))
+		return NULL;
+
+#ifdef CONFIG_AMIGA
+	if (MACH_IS_AMIGA) {
+		if ((physaddr >= 0x40000000) && (physaddr + size < 0x60000000)
+		    && (cacheflag == IOMAP_NOCACHE_SER))
+			return (void __iomem *)physaddr;
+	}
+#endif
+
+#ifdef DEBUG
+	printk("ioremap: 0x%lx,0x%lx(%d) - ", physaddr, size, cacheflag);
+#endif
+	/*
+	 * Mappings have to be aligned
+	 */
+	offset = physaddr & (IO_SIZE - 1);
+	physaddr &= -IO_SIZE;
+	size = (size + offset + IO_SIZE - 1) & -IO_SIZE;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_io_area(size);
+	if (!area)
+		return NULL;
+
+	virtaddr = (unsigned long)area->addr;
+	retaddr = virtaddr + offset;
+#ifdef DEBUG
+	printk("0x%lx,0x%lx,0x%lx", physaddr, virtaddr, retaddr);
+#endif
+
+	/*
+	 * add cache and table flags to physical address
+	 */
+	if (CPU_IS_040_OR_060) {
+		physaddr |= (_PAGE_PRESENT | _PAGE_GLOBAL040 |
+			     _PAGE_ACCESSED | _PAGE_DIRTY);
+		switch (cacheflag) {
+		case IOMAP_FULL_CACHING:
+			physaddr |= _PAGE_CACHE040;
+			break;
+		case IOMAP_NOCACHE_SER:
+		default:
+			physaddr |= _PAGE_NOCACHE_S;
+			break;
+		case IOMAP_NOCACHE_NONSER:
+			physaddr |= _PAGE_NOCACHE;
+			break;
+		case IOMAP_WRITETHROUGH:
+			physaddr |= _PAGE_CACHE040W;
+			break;
+		}
+	} else {
+		physaddr |= (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
+		switch (cacheflag) {
+		case IOMAP_NOCACHE_SER:
+		case IOMAP_NOCACHE_NONSER:
+		default:
+			physaddr |= _PAGE_NOCACHE030;
+			break;
+		case IOMAP_FULL_CACHING:
+		case IOMAP_WRITETHROUGH:
+			break;
+		}
+	}
+
+	while ((long)size > 0) {
+#ifdef DEBUG
+		if (!(virtaddr & (PTRTREESIZE-1)))
+			printk ("\npa=%#lx va=%#lx ", physaddr, virtaddr);
+#endif
+		pgd_dir = pgd_offset_k(virtaddr);
+		pmd_dir = pmd_alloc(&init_mm, pgd_dir, virtaddr);
+		if (!pmd_dir) {
+			printk("ioremap: no mem for pmd_dir\n");
+			return NULL;
+		}
+
+		if (CPU_IS_020_OR_030) {
+			pmd_dir->pmd[(virtaddr/PTRTREESIZE) & 15] = physaddr;
+			physaddr += PTRTREESIZE;
+			virtaddr += PTRTREESIZE;
+			size -= PTRTREESIZE;
+		} else {
+			pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
+			if (!pte_dir) {
+				printk("ioremap: no mem for pte_dir\n");
+				return NULL;
+			}
+
+			pte_val(*pte_dir) = physaddr;
+			virtaddr += PAGE_SIZE;
+			physaddr += PAGE_SIZE;
+			size -= PAGE_SIZE;
+		}
+	}
+#ifdef DEBUG
+	printk("\n");
+#endif
+	flush_tlb_all();
+
+	return (void __iomem *)retaddr;
+}
+EXPORT_SYMBOL(__ioremap);
+
+/*
+ * Unmap a ioremap()ed region again
+ */
+void iounmap(void __iomem *addr)
+{
+#ifdef CONFIG_AMIGA
+	if ((!MACH_IS_AMIGA) ||
+	    (((unsigned long)addr < 0x40000000) ||
+	     ((unsigned long)addr > 0x60000000)))
+			free_io_area((__force void *)addr);
+#else
+	free_io_area((__force void *)addr);
+#endif
+}
+EXPORT_SYMBOL(iounmap);
+
+/*
+ * __iounmap unmaps nearly everything, so be careful
+ * it doesn't free currently pointer/page tables anymore but it
+ * wans't used anyway and might be added later.
+ */
+void __iounmap(void *addr, unsigned long size)
+{
+	unsigned long virtaddr = (unsigned long)addr;
+	pgd_t *pgd_dir;
+	pmd_t *pmd_dir;
+	pte_t *pte_dir;
+
+	while ((long)size > 0) {
+		pgd_dir = pgd_offset_k(virtaddr);
+		if (pgd_bad(*pgd_dir)) {
+			printk("iounmap: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
+			pgd_clear(pgd_dir);
+			return;
+		}
+		pmd_dir = pmd_offset(pgd_dir, virtaddr);
+
+		if (CPU_IS_020_OR_030) {
+			int pmd_off = (virtaddr/PTRTREESIZE) & 15;
+			int pmd_type = pmd_dir->pmd[pmd_off] & _DESCTYPE_MASK;
+
+			if (pmd_type == _PAGE_PRESENT) {
+				pmd_dir->pmd[pmd_off] = 0;
+				virtaddr += PTRTREESIZE;
+				size -= PTRTREESIZE;
+				continue;
+			} else if (pmd_type == 0)
+				continue;
+		}
+
+		if (pmd_bad(*pmd_dir)) {
+			printk("iounmap: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
+			pmd_clear(pmd_dir);
+			return;
+		}
+		pte_dir = pte_offset_kernel(pmd_dir, virtaddr);
+
+		pte_val(*pte_dir) = 0;
+		virtaddr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	flush_tlb_all();
+}
+
+/*
+ * Set new cache mode for some kernel address space.
+ * The caller must push data for that range itself, if such data may already
+ * be in the cache.
+ */
+void kernel_set_cachemode(void *addr, unsigned long size, int cmode)
+{
+	unsigned long virtaddr = (unsigned long)addr;
+	pgd_t *pgd_dir;
+	pmd_t *pmd_dir;
+	pte_t *pte_dir;
+
+	if (CPU_IS_040_OR_060) {
+		switch (cmode) {
+		case IOMAP_FULL_CACHING:
+			cmode = _PAGE_CACHE040;
+			break;
+		case IOMAP_NOCACHE_SER:
+		default:
+			cmode = _PAGE_NOCACHE_S;
+			break;
+		case IOMAP_NOCACHE_NONSER:
+			cmode = _PAGE_NOCACHE;
+			break;
+		case IOMAP_WRITETHROUGH:
+			cmode = _PAGE_CACHE040W;
+			break;
+		}
+	} else {
+		switch (cmode) {
+		case IOMAP_NOCACHE_SER:
+		case IOMAP_NOCACHE_NONSER:
+		default:
+			cmode = _PAGE_NOCACHE030;
+			break;
+		case IOMAP_FULL_CACHING:
+		case IOMAP_WRITETHROUGH:
+			cmode = 0;
+		}
+	}
+
+	while ((long)size > 0) {
+		pgd_dir = pgd_offset_k(virtaddr);
+		if (pgd_bad(*pgd_dir)) {
+			printk("iocachemode: bad pgd(%08lx)\n", pgd_val(*pgd_dir));
+			pgd_clear(pgd_dir);
+			return;
+		}
+		pmd_dir = pmd_offset(pgd_dir, virtaddr);
+
+		if (CPU_IS_020_OR_030) {
+			int pmd_off = (virtaddr/PTRTREESIZE) & 15;
+
+			if ((pmd_dir->pmd[pmd_off] & _DESCTYPE_MASK) == _PAGE_PRESENT) {
+				pmd_dir->pmd[pmd_off] = (pmd_dir->pmd[pmd_off] &
+							 _CACHEMASK040) | cmode;
+				virtaddr += PTRTREESIZE;
+				size -= PTRTREESIZE;
+				continue;
+			}
+		}
+
+		if (pmd_bad(*pmd_dir)) {
+			printk("iocachemode: bad pmd (%08lx)\n", pmd_val(*pmd_dir));
+			pmd_clear(pmd_dir);
+			return;
+		}
+		pte_dir = pte_offset_kernel(pmd_dir, virtaddr);
+
+		pte_val(*pte_dir) = (pte_val(*pte_dir) & _CACHEMASK040) | cmode;
+		virtaddr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	flush_tlb_all();
+}
+EXPORT_SYMBOL(kernel_set_cachemode);

diff --git a/arch/m68knommu/mm/kmap.c b/arch/m68k/mm/kmap_no.c
similarity index 100%
rename from arch/m68knommu/mm/kmap.c
rename to arch/m68k/mm/kmap_no.c


diff --git a/arch/m68knommu/platform/5206/Makefile b/arch/m68k/platform/5206/Makefile
similarity index 100%
rename from arch/m68knommu/platform/5206/Makefile
rename to arch/m68k/platform/5206/Makefile


diff --git a/arch/m68knommu/platform/5206/config.c b/arch/m68k/platform/5206/config.c
similarity index 100%
rename from arch/m68knommu/platform/5206/config.c
rename to arch/m68k/platform/5206/config.c


diff --git a/arch/m68knommu/platform/5206/gpio.c b/arch/m68k/platform/5206/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/5206/gpio.c
rename to arch/m68k/platform/5206/gpio.c


diff --git a/arch/m68knommu/platform/5206e/Makefile b/arch/m68k/platform/5206e/Makefile
similarity index 100%
rename from arch/m68knommu/platform/5206e/Makefile
rename to arch/m68k/platform/5206e/Makefile


diff --git a/arch/m68knommu/platform/5206e/config.c b/arch/m68k/platform/5206e/config.c
similarity index 100%
rename from arch/m68knommu/platform/5206e/config.c
rename to arch/m68k/platform/5206e/config.c


diff --git a/arch/m68knommu/platform/5206e/gpio.c b/arch/m68k/platform/5206e/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/5206e/gpio.c
rename to arch/m68k/platform/5206e/gpio.c


diff --git a/arch/m68knommu/platform/520x/Makefile b/arch/m68k/platform/520x/Makefile
similarity index 100%
rename from arch/m68knommu/platform/520x/Makefile
rename to arch/m68k/platform/520x/Makefile


diff --git a/arch/m68knommu/platform/520x/config.c b/arch/m68k/platform/520x/config.c
similarity index 100%
rename from arch/m68knommu/platform/520x/config.c
rename to arch/m68k/platform/520x/config.c


diff --git a/arch/m68knommu/platform/520x/gpio.c b/arch/m68k/platform/520x/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/520x/gpio.c
rename to arch/m68k/platform/520x/gpio.c


diff --git a/arch/m68knommu/platform/523x/Makefile b/arch/m68k/platform/523x/Makefile
similarity index 100%
rename from arch/m68knommu/platform/523x/Makefile
rename to arch/m68k/platform/523x/Makefile


diff --git a/arch/m68knommu/platform/523x/config.c b/arch/m68k/platform/523x/config.c
similarity index 100%
rename from arch/m68knommu/platform/523x/config.c
rename to arch/m68k/platform/523x/config.c


diff --git a/arch/m68knommu/platform/523x/gpio.c b/arch/m68k/platform/523x/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/523x/gpio.c
rename to arch/m68k/platform/523x/gpio.c


diff --git a/arch/m68knommu/platform/5249/Makefile b/arch/m68k/platform/5249/Makefile
similarity index 100%
rename from arch/m68knommu/platform/5249/Makefile
rename to arch/m68k/platform/5249/Makefile


diff --git a/arch/m68knommu/platform/5249/config.c b/arch/m68k/platform/5249/config.c
similarity index 100%
rename from arch/m68knommu/platform/5249/config.c
rename to arch/m68k/platform/5249/config.c


diff --git a/arch/m68knommu/platform/5249/gpio.c b/arch/m68k/platform/5249/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/5249/gpio.c
rename to arch/m68k/platform/5249/gpio.c


diff --git a/arch/m68knommu/platform/5249/intc2.c b/arch/m68k/platform/5249/intc2.c
similarity index 100%
rename from arch/m68knommu/platform/5249/intc2.c
rename to arch/m68k/platform/5249/intc2.c


diff --git a/arch/m68knommu/platform/5272/Makefile b/arch/m68k/platform/5272/Makefile
similarity index 100%
rename from arch/m68knommu/platform/5272/Makefile
rename to arch/m68k/platform/5272/Makefile


diff --git a/arch/m68knommu/platform/5272/config.c b/arch/m68k/platform/5272/config.c
similarity index 100%
rename from arch/m68knommu/platform/5272/config.c
rename to arch/m68k/platform/5272/config.c


diff --git a/arch/m68knommu/platform/5272/gpio.c b/arch/m68k/platform/5272/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/5272/gpio.c
rename to arch/m68k/platform/5272/gpio.c


diff --git a/arch/m68knommu/platform/5272/intc.c b/arch/m68k/platform/5272/intc.c
similarity index 100%
rename from arch/m68knommu/platform/5272/intc.c
rename to arch/m68k/platform/5272/intc.c


diff --git a/arch/m68knommu/platform/527x/Makefile b/arch/m68k/platform/527x/Makefile
similarity index 100%
rename from arch/m68knommu/platform/527x/Makefile
rename to arch/m68k/platform/527x/Makefile


diff --git a/arch/m68knommu/platform/527x/config.c b/arch/m68k/platform/527x/config.c
similarity index 100%
rename from arch/m68knommu/platform/527x/config.c
rename to arch/m68k/platform/527x/config.c


diff --git a/arch/m68knommu/platform/527x/gpio.c b/arch/m68k/platform/527x/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/527x/gpio.c
rename to arch/m68k/platform/527x/gpio.c


diff --git a/arch/m68knommu/platform/528x/Makefile b/arch/m68k/platform/528x/Makefile
similarity index 100%
rename from arch/m68knommu/platform/528x/Makefile
rename to arch/m68k/platform/528x/Makefile


diff --git a/arch/m68knommu/platform/528x/config.c b/arch/m68k/platform/528x/config.c
similarity index 100%
rename from arch/m68knommu/platform/528x/config.c
rename to arch/m68k/platform/528x/config.c


diff --git a/arch/m68knommu/platform/528x/gpio.c b/arch/m68k/platform/528x/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/528x/gpio.c
rename to arch/m68k/platform/528x/gpio.c


diff --git a/arch/m68knommu/platform/5307/Makefile b/arch/m68k/platform/5307/Makefile
similarity index 100%
rename from arch/m68knommu/platform/5307/Makefile
rename to arch/m68k/platform/5307/Makefile


diff --git a/arch/m68knommu/platform/5307/config.c b/arch/m68k/platform/5307/config.c
similarity index 100%
rename from arch/m68knommu/platform/5307/config.c
rename to arch/m68k/platform/5307/config.c


diff --git a/arch/m68knommu/platform/5307/gpio.c b/arch/m68k/platform/5307/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/5307/gpio.c
rename to arch/m68k/platform/5307/gpio.c


diff --git a/arch/m68knommu/platform/5307/nettel.c b/arch/m68k/platform/5307/nettel.c
similarity index 100%
rename from arch/m68knommu/platform/5307/nettel.c
rename to arch/m68k/platform/5307/nettel.c


diff --git a/arch/m68knommu/platform/532x/Makefile b/arch/m68k/platform/532x/Makefile
similarity index 100%
rename from arch/m68knommu/platform/532x/Makefile
rename to arch/m68k/platform/532x/Makefile


diff --git a/arch/m68knommu/platform/532x/config.c b/arch/m68k/platform/532x/config.c
similarity index 100%
rename from arch/m68knommu/platform/532x/config.c
rename to arch/m68k/platform/532x/config.c


diff --git a/arch/m68knommu/platform/532x/gpio.c b/arch/m68k/platform/532x/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/532x/gpio.c
rename to arch/m68k/platform/532x/gpio.c


diff --git a/arch/m68knommu/platform/5407/Makefile b/arch/m68k/platform/5407/Makefile
similarity index 100%
rename from arch/m68knommu/platform/5407/Makefile
rename to arch/m68k/platform/5407/Makefile


diff --git a/arch/m68knommu/platform/5407/config.c b/arch/m68k/platform/5407/config.c
similarity index 100%
rename from arch/m68knommu/platform/5407/config.c
rename to arch/m68k/platform/5407/config.c


diff --git a/arch/m68knommu/platform/5407/gpio.c b/arch/m68k/platform/5407/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/5407/gpio.c
rename to arch/m68k/platform/5407/gpio.c


diff --git a/arch/m68knommu/platform/54xx/Makefile b/arch/m68k/platform/54xx/Makefile
similarity index 100%
rename from arch/m68knommu/platform/54xx/Makefile
rename to arch/m68k/platform/54xx/Makefile


diff --git a/arch/m68knommu/platform/54xx/config.c b/arch/m68k/platform/54xx/config.c
similarity index 100%
rename from arch/m68knommu/platform/54xx/config.c
rename to arch/m68k/platform/54xx/config.c


diff --git a/arch/m68knommu/platform/54xx/firebee.c b/arch/m68k/platform/54xx/firebee.c
similarity index 100%
rename from arch/m68knommu/platform/54xx/firebee.c
rename to arch/m68k/platform/54xx/firebee.c


diff --git a/arch/m68knommu/platform/68328/Makefile b/arch/m68k/platform/68328/Makefile
similarity index 100%
rename from arch/m68knommu/platform/68328/Makefile
rename to arch/m68k/platform/68328/Makefile


diff --git a/arch/m68knommu/platform/68328/bootlogo.h b/arch/m68k/platform/68328/bootlogo.h
similarity index 100%
rename from arch/m68knommu/platform/68328/bootlogo.h
rename to arch/m68k/platform/68328/bootlogo.h


diff --git a/arch/m68knommu/platform/68328/bootlogo.pl b/arch/m68k/platform/68328/bootlogo.pl
similarity index 100%
rename from arch/m68knommu/platform/68328/bootlogo.pl
rename to arch/m68k/platform/68328/bootlogo.pl


diff --git a/arch/m68knommu/platform/68328/config.c b/arch/m68k/platform/68328/config.c
similarity index 100%
rename from arch/m68knommu/platform/68328/config.c
rename to arch/m68k/platform/68328/config.c


diff --git a/arch/m68knommu/platform/68328/entry.S b/arch/m68k/platform/68328/entry.S
similarity index 100%
rename from arch/m68knommu/platform/68328/entry.S
rename to arch/m68k/platform/68328/entry.S


diff --git a/arch/m68knommu/platform/68328/head-de2.S b/arch/m68k/platform/68328/head-de2.S
similarity index 100%
rename from arch/m68knommu/platform/68328/head-de2.S
rename to arch/m68k/platform/68328/head-de2.S


diff --git a/arch/m68knommu/platform/68328/head-pilot.S b/arch/m68k/platform/68328/head-pilot.S
similarity index 100%
rename from arch/m68knommu/platform/68328/head-pilot.S
rename to arch/m68k/platform/68328/head-pilot.S


diff --git a/arch/m68knommu/platform/68328/head-ram.S b/arch/m68k/platform/68328/head-ram.S
similarity index 100%
rename from arch/m68knommu/platform/68328/head-ram.S
rename to arch/m68k/platform/68328/head-ram.S


diff --git a/arch/m68knommu/platform/68328/head-rom.S b/arch/m68k/platform/68328/head-rom.S
similarity index 100%
rename from arch/m68knommu/platform/68328/head-rom.S
rename to arch/m68k/platform/68328/head-rom.S


diff --git a/arch/m68knommu/platform/68328/ints.c b/arch/m68k/platform/68328/ints.c
similarity index 100%
rename from arch/m68knommu/platform/68328/ints.c
rename to arch/m68k/platform/68328/ints.c


diff --git a/arch/m68knommu/platform/68328/romvec.S b/arch/m68k/platform/68328/romvec.S
similarity index 100%
rename from arch/m68knommu/platform/68328/romvec.S
rename to arch/m68k/platform/68328/romvec.S


diff --git a/arch/m68knommu/platform/68328/timers.c b/arch/m68k/platform/68328/timers.c
similarity index 100%
rename from arch/m68knommu/platform/68328/timers.c
rename to arch/m68k/platform/68328/timers.c


diff --git a/arch/m68knommu/platform/68360/Makefile b/arch/m68k/platform/68360/Makefile
similarity index 100%
rename from arch/m68knommu/platform/68360/Makefile
rename to arch/m68k/platform/68360/Makefile


diff --git a/arch/m68knommu/platform/68360/commproc.c b/arch/m68k/platform/68360/commproc.c
similarity index 100%
rename from arch/m68knommu/platform/68360/commproc.c
rename to arch/m68k/platform/68360/commproc.c


diff --git a/arch/m68knommu/platform/68360/config.c b/arch/m68k/platform/68360/config.c
similarity index 100%
rename from arch/m68knommu/platform/68360/config.c
rename to arch/m68k/platform/68360/config.c


diff --git a/arch/m68knommu/platform/68360/entry.S b/arch/m68k/platform/68360/entry.S
similarity index 100%
rename from arch/m68knommu/platform/68360/entry.S
rename to arch/m68k/platform/68360/entry.S


diff --git a/arch/m68knommu/platform/68360/head-ram.S b/arch/m68k/platform/68360/head-ram.S
similarity index 100%
rename from arch/m68knommu/platform/68360/head-ram.S
rename to arch/m68k/platform/68360/head-ram.S


diff --git a/arch/m68knommu/platform/68360/head-rom.S b/arch/m68k/platform/68360/head-rom.S
similarity index 100%
rename from arch/m68knommu/platform/68360/head-rom.S
rename to arch/m68k/platform/68360/head-rom.S


diff --git a/arch/m68knommu/platform/68360/ints.c b/arch/m68k/platform/68360/ints.c
similarity index 100%
rename from arch/m68knommu/platform/68360/ints.c
rename to arch/m68k/platform/68360/ints.c


diff --git a/arch/m68knommu/platform/68EZ328/Makefile b/arch/m68k/platform/68EZ328/Makefile
similarity index 100%
rename from arch/m68knommu/platform/68EZ328/Makefile
rename to arch/m68k/platform/68EZ328/Makefile


diff --git a/arch/m68knommu/platform/68EZ328/bootlogo.h b/arch/m68k/platform/68EZ328/bootlogo.h
similarity index 100%
rename from arch/m68knommu/platform/68EZ328/bootlogo.h
rename to arch/m68k/platform/68EZ328/bootlogo.h


diff --git a/arch/m68knommu/platform/68EZ328/config.c b/arch/m68k/platform/68EZ328/config.c
similarity index 100%
rename from arch/m68knommu/platform/68EZ328/config.c
rename to arch/m68k/platform/68EZ328/config.c


diff --git a/arch/m68knommu/platform/68VZ328/Makefile b/arch/m68k/platform/68VZ328/Makefile
similarity index 100%
rename from arch/m68knommu/platform/68VZ328/Makefile
rename to arch/m68k/platform/68VZ328/Makefile


diff --git a/arch/m68knommu/platform/68VZ328/config.c b/arch/m68k/platform/68VZ328/config.c
similarity index 100%
rename from arch/m68knommu/platform/68VZ328/config.c
rename to arch/m68k/platform/68VZ328/config.c


diff --git a/arch/m68knommu/platform/Makefile b/arch/m68k/platform/Makefile
similarity index 100%
rename from arch/m68knommu/platform/Makefile
rename to arch/m68k/platform/Makefile


diff --git a/arch/m68knommu/platform/coldfire/Makefile b/arch/m68k/platform/coldfire/Makefile
similarity index 100%
rename from arch/m68knommu/platform/coldfire/Makefile
rename to arch/m68k/platform/coldfire/Makefile


diff --git a/arch/m68knommu/platform/coldfire/cache.c b/arch/m68k/platform/coldfire/cache.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/cache.c
rename to arch/m68k/platform/coldfire/cache.c


diff --git a/arch/m68knommu/platform/coldfire/clk.c b/arch/m68k/platform/coldfire/clk.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/clk.c
rename to arch/m68k/platform/coldfire/clk.c


diff --git a/arch/m68knommu/platform/coldfire/dma.c b/arch/m68k/platform/coldfire/dma.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/dma.c
rename to arch/m68k/platform/coldfire/dma.c


diff --git a/arch/m68knommu/platform/coldfire/dma_timer.c b/arch/m68k/platform/coldfire/dma_timer.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/dma_timer.c
rename to arch/m68k/platform/coldfire/dma_timer.c


diff --git a/arch/m68knommu/platform/coldfire/entry.S b/arch/m68k/platform/coldfire/entry.S
similarity index 100%
rename from arch/m68knommu/platform/coldfire/entry.S
rename to arch/m68k/platform/coldfire/entry.S


diff --git a/arch/m68knommu/platform/coldfire/gpio.c b/arch/m68k/platform/coldfire/gpio.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/gpio.c
rename to arch/m68k/platform/coldfire/gpio.c


diff --git a/arch/m68knommu/platform/coldfire/head.S b/arch/m68k/platform/coldfire/head.S
similarity index 100%
rename from arch/m68knommu/platform/coldfire/head.S
rename to arch/m68k/platform/coldfire/head.S


diff --git a/arch/m68knommu/platform/coldfire/intc-2.c b/arch/m68k/platform/coldfire/intc-2.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/intc-2.c
rename to arch/m68k/platform/coldfire/intc-2.c


diff --git a/arch/m68knommu/platform/coldfire/intc-simr.c b/arch/m68k/platform/coldfire/intc-simr.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/intc-simr.c
rename to arch/m68k/platform/coldfire/intc-simr.c


diff --git a/arch/m68knommu/platform/coldfire/intc.c b/arch/m68k/platform/coldfire/intc.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/intc.c
rename to arch/m68k/platform/coldfire/intc.c


diff --git a/arch/m68knommu/platform/coldfire/pinmux.c b/arch/m68k/platform/coldfire/pinmux.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/pinmux.c
rename to arch/m68k/platform/coldfire/pinmux.c


diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68k/platform/coldfire/pit.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/pit.c
rename to arch/m68k/platform/coldfire/pit.c


diff --git a/arch/m68knommu/platform/coldfire/sltimers.c b/arch/m68k/platform/coldfire/sltimers.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/sltimers.c
rename to arch/m68k/platform/coldfire/sltimers.c


diff --git a/arch/m68knommu/platform/coldfire/timers.c b/arch/m68k/platform/coldfire/timers.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/timers.c
rename to arch/m68k/platform/coldfire/timers.c


diff --git a/arch/m68knommu/platform/coldfire/vectors.c b/arch/m68k/platform/coldfire/vectors.c
similarity index 100%
rename from arch/m68knommu/platform/coldfire/vectors.c
rename to arch/m68k/platform/coldfire/vectors.c


diff --git a/arch/m68knommu/Kconfig.debug b/arch/m68knommu/Kconfig.debug
deleted file mode 100644
index ed6d9a8..0000000
--- a/arch/m68knommu/Kconfig.debug
+++ /dev/null

@@ -1,35 +0,0 @@
-menu "Kernel hacking"
-
-source "lib/Kconfig.debug"
-
-config FULLDEBUG
-	bool "Full Symbolic/Source Debugging support"
-	help
-	  Enable debugging symbols on kernel build.
-
-config HIGHPROFILE
-	bool "Use fast second timer for profiling"
-	depends on COLDFIRE
-	help
-	  Use a fast secondary clock to produce profiling information.
-
-config BOOTPARAM
-	bool 'Compiled-in Kernel Boot Parameter'
-
-config BOOTPARAM_STRING
-	string 'Kernel Boot Parameter'
-	default 'console=ttyS0,19200'
-	depends on BOOTPARAM
-
-config NO_KERNEL_MSG
-	bool "Suppress Kernel BUG Messages"
-	help
-	  Do not output any debug BUG messages within the kernel.
-
-config BDM_DISABLE
-	bool "Disable BDM signals"
-	depends on (EXPERIMENTAL && COLDFIRE)
-	help
-	  Disable the ColdFire CPU's BDM signals.
-
-endmenu

diff --git a/arch/m68knommu/defconfig b/arch/m68knommu/defconfig
deleted file mode 100644
index 2f5655c..0000000
--- a/arch/m68knommu/defconfig
+++ /dev/null

@@ -1,74 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-# CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
-# CONFIG_FUTEX is not set
-# CONFIG_EPOLL is not set
-# CONFIG_SIGNALFD is not set
-# CONFIG_TIMERFD is not set
-# CONFIG_EVENTFD is not set
-# CONFIG_AIO is not set
-# CONFIG_VM_EVENT_COUNTERS is not set
-# CONFIG_COMPAT_BRK is not set
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_M520x=y
-CONFIG_CLOCK_SET=y
-CONFIG_CLOCK_FREQ=166666666
-CONFIG_CLOCK_DIV=2
-CONFIG_M5208EVB=y
-# CONFIG_4KSTACKS is not set
-CONFIG_RAMBASE=0x40000000
-CONFIG_RAMSIZE=0x2000000
-CONFIG_VECTORBASE=0x40000000
-CONFIG_KERNELBASE=0x40020000
-CONFIG_RAM16BIT=y
-CONFIG_BINFMT_FLAT=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_RAM=y
-CONFIG_MTD_UCLINUX=y
-CONFIG_BLK_DEV_RAM=y
-# CONFIG_MISC_DEVICES is not set
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_FEC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_MCF=y
-CONFIG_SERIAL_MCF_BAUDRATE=115200
-CONFIG_SERIAL_MCF_CONSOLE=y
-# CONFIG_UNIX98_PTYS is not set
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EXT2_FS=y
-# CONFIG_FILE_LOCKING is not set
-# CONFIG_DNOTIFY is not set
-# CONFIG_SYSFS is not set
-CONFIG_ROMFS_FS=y
-CONFIG_ROMFS_BACKED_BY_MTD=y
-# CONFIG_NETWORK_FILESYSTEMS is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_FULLDEBUG=y
-CONFIG_BOOTPARAM=y
-CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0"

diff --git a/arch/m68knommu/kernel/.gitignore b/arch/m68knommu/kernel/.gitignore
deleted file mode 100644
index c5f676c..0000000
--- a/arch/m68knommu/kernel/.gitignore
+++ /dev/null

@@ -1 +0,0 @@
-vmlinux.lds

diff --git a/arch/m68knommu/lib/ashldi3.c b/arch/m68knommu/lib/ashldi3.c
deleted file mode 100644
index 008403e..0000000
--- a/arch/m68knommu/lib/ashldi3.c
+++ /dev/null

@@ -1,62 +0,0 @@
-/* ashrdi3.c extracted from gcc-2.95.2/libgcc2.c which is: */
-/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__ashldi3 (DItype u, word_type b)
-{
-  DIunion w;
-  word_type bm;
-  DIunion uu;
-
-  if (b == 0)
-    return u;
-
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      w.s.low = 0;
-      w.s.high = (USItype)uu.s.low << -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.low >> bm;
-      w.s.low = (USItype)uu.s.low << b;
-      w.s.high = ((USItype)uu.s.high << b) | carries;
-    }
-
-  return w.ll;
-}

diff --git a/arch/m68knommu/lib/lshrdi3.c b/arch/m68knommu/lib/lshrdi3.c
deleted file mode 100644
index 93b1cb6..0000000
--- a/arch/m68knommu/lib/lshrdi3.c
+++ /dev/null

@@ -1,62 +0,0 @@
-/* lshrdi3.c extracted from gcc-2.7.2/libgcc2.c which is: */
-/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define BITS_PER_UNIT 8
-
-typedef 	 int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef		 int DItype	__attribute__ ((mode (DI)));
-typedef int word_type __attribute__ ((mode (__word__)));
-
-struct DIstruct {SItype high, low;};
-
-typedef union
-{
-  struct DIstruct s;
-  DItype ll;
-} DIunion;
-
-DItype
-__lshrdi3 (DItype u, word_type b)
-{
-  DIunion w;
-  word_type bm;
-  DIunion uu;
-
-  if (b == 0)
-    return u;
-
-  uu.ll = u;
-
-  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
-  if (bm <= 0)
-    {
-      w.s.high = 0;
-      w.s.low = (USItype)uu.s.high >> -bm;
-    }
-  else
-    {
-      USItype carries = (USItype)uu.s.high << bm;
-      w.s.high = (USItype)uu.s.high >> b;
-      w.s.low = ((USItype)uu.s.low >> b) | carries;
-    }
-
-  return w.ll;
-}

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 9905e2e..83aa5fb 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig

@@ -22,6 +22,7 @@
 	select HAVE_DMA_API_DEBUG
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
+	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_JUMP_LABEL
 
 menu "Machine selection"
@@ -862,6 +863,9 @@
 config CFE
 	bool
 
+config ARCH_DMA_ADDR_T_64BIT
+	def_bool (HIGHMEM && 64BIT_PHYS_ADDR) || 64BIT
+
 config DMA_COHERENT
 	bool
 

diff --git a/arch/mips/alchemy/common/irq.c b/arch/mips/alchemy/common/irq.c
index 9f78ada..55dd7c8 100644
--- a/arch/mips/alchemy/common/irq.c
+++ b/arch/mips/alchemy/common/irq.c

@@ -39,7 +39,7 @@
 #include <asm/mach-pb1x00/pb1000.h>
 #endif
 
-static int au1x_ic_settype(unsigned int irq, unsigned int flow_type);
+static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type);
 
 /* NOTE on interrupt priorities: The original writers of this code said:
  *
@@ -218,17 +218,17 @@
 };
 
 
-static void au1x_ic0_unmask(unsigned int irq_nr)
+static void au1x_ic0_unmask(struct irq_data *d)
 {
-	unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
 	au_writel(1 << bit, IC0_MASKSET);
 	au_writel(1 << bit, IC0_WAKESET);
 	au_sync();
 }
 
-static void au1x_ic1_unmask(unsigned int irq_nr)
+static void au1x_ic1_unmask(struct irq_data *d)
 {
-	unsigned int bit = irq_nr - AU1000_INTC1_INT_BASE;
+	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
 	au_writel(1 << bit, IC1_MASKSET);
 	au_writel(1 << bit, IC1_WAKESET);
 
@@ -236,31 +236,31 @@
  * nowhere in the current kernel sources is it disabled.	--mlau
  */
 #if defined(CONFIG_MIPS_PB1000)
-	if (irq_nr == AU1000_GPIO15_INT)
+	if (d->irq == AU1000_GPIO15_INT)
 		au_writel(0x4000, PB1000_MDR); /* enable int */
 #endif
 	au_sync();
 }
 
-static void au1x_ic0_mask(unsigned int irq_nr)
+static void au1x_ic0_mask(struct irq_data *d)
 {
-	unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
 	au_writel(1 << bit, IC0_MASKCLR);
 	au_writel(1 << bit, IC0_WAKECLR);
 	au_sync();
 }
 
-static void au1x_ic1_mask(unsigned int irq_nr)
+static void au1x_ic1_mask(struct irq_data *d)
 {
-	unsigned int bit = irq_nr - AU1000_INTC1_INT_BASE;
+	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
 	au_writel(1 << bit, IC1_MASKCLR);
 	au_writel(1 << bit, IC1_WAKECLR);
 	au_sync();
 }
 
-static void au1x_ic0_ack(unsigned int irq_nr)
+static void au1x_ic0_ack(struct irq_data *d)
 {
-	unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
 
 	/*
 	 * This may assume that we don't get interrupts from
@@ -271,9 +271,9 @@
 	au_sync();
 }
 
-static void au1x_ic1_ack(unsigned int irq_nr)
+static void au1x_ic1_ack(struct irq_data *d)
 {
-	unsigned int bit = irq_nr - AU1000_INTC1_INT_BASE;
+	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
 
 	/*
 	 * This may assume that we don't get interrupts from
@@ -284,9 +284,9 @@
 	au_sync();
 }
 
-static void au1x_ic0_maskack(unsigned int irq_nr)
+static void au1x_ic0_maskack(struct irq_data *d)
 {
-	unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+	unsigned int bit = d->irq - AU1000_INTC0_INT_BASE;
 
 	au_writel(1 << bit, IC0_WAKECLR);
 	au_writel(1 << bit, IC0_MASKCLR);
@@ -295,9 +295,9 @@
 	au_sync();
 }
 
-static void au1x_ic1_maskack(unsigned int irq_nr)
+static void au1x_ic1_maskack(struct irq_data *d)
 {
-	unsigned int bit = irq_nr - AU1000_INTC1_INT_BASE;
+	unsigned int bit = d->irq - AU1000_INTC1_INT_BASE;
 
 	au_writel(1 << bit, IC1_WAKECLR);
 	au_writel(1 << bit, IC1_MASKCLR);
@@ -306,9 +306,9 @@
 	au_sync();
 }
 
-static int au1x_ic1_setwake(unsigned int irq, unsigned int on)
+static int au1x_ic1_setwake(struct irq_data *d, unsigned int on)
 {
-	int bit = irq - AU1000_INTC1_INT_BASE;
+	int bit = d->irq - AU1000_INTC1_INT_BASE;
 	unsigned long wakemsk, flags;
 
 	/* only GPIO 0-7 can act as wakeup source.  Fortunately these
@@ -336,28 +336,30 @@
  */
 static struct irq_chip au1x_ic0_chip = {
 	.name		= "Alchemy-IC0",
-	.ack		= au1x_ic0_ack,
-	.mask		= au1x_ic0_mask,
-	.mask_ack	= au1x_ic0_maskack,
-	.unmask		= au1x_ic0_unmask,
-	.set_type	= au1x_ic_settype,
+	.irq_ack	= au1x_ic0_ack,
+	.irq_mask	= au1x_ic0_mask,
+	.irq_mask_ack	= au1x_ic0_maskack,
+	.irq_unmask	= au1x_ic0_unmask,
+	.irq_set_type	= au1x_ic_settype,
 };
 
 static struct irq_chip au1x_ic1_chip = {
 	.name		= "Alchemy-IC1",
-	.ack		= au1x_ic1_ack,
-	.mask		= au1x_ic1_mask,
-	.mask_ack	= au1x_ic1_maskack,
-	.unmask		= au1x_ic1_unmask,
-	.set_type	= au1x_ic_settype,
-	.set_wake	= au1x_ic1_setwake,
+	.irq_ack	= au1x_ic1_ack,
+	.irq_mask	= au1x_ic1_mask,
+	.irq_mask_ack	= au1x_ic1_maskack,
+	.irq_unmask	= au1x_ic1_unmask,
+	.irq_set_type	= au1x_ic_settype,
+	.irq_set_wake	= au1x_ic1_setwake,
 };
 
-static int au1x_ic_settype(unsigned int irq, unsigned int flow_type)
+static int au1x_ic_settype(struct irq_data *d, unsigned int flow_type)
 {
 	struct irq_chip *chip;
 	unsigned long icr[6];
-	unsigned int bit, ic;
+	unsigned int bit, ic, irq = d->irq;
+	irq_flow_handler_t handler = NULL;
+	unsigned char *name = NULL;
 	int ret;
 
 	if (irq >= AU1000_INTC1_INT_BASE) {
@@ -387,47 +389,47 @@
 		au_writel(1 << bit, icr[5]);
 		au_writel(1 << bit, icr[4]);
 		au_writel(1 << bit, icr[0]);
-		set_irq_chip_and_handler_name(irq, chip,
-				handle_edge_irq, "riseedge");
+		handler = handle_edge_irq;
+		name = "riseedge";
 		break;
 	case IRQ_TYPE_EDGE_FALLING:	/* 0:1:0 */
 		au_writel(1 << bit, icr[5]);
 		au_writel(1 << bit, icr[1]);
 		au_writel(1 << bit, icr[3]);
-		set_irq_chip_and_handler_name(irq, chip,
-				handle_edge_irq, "falledge");
+		handler = handle_edge_irq;
+		name = "falledge";
 		break;
 	case IRQ_TYPE_EDGE_BOTH:	/* 0:1:1 */
 		au_writel(1 << bit, icr[5]);
 		au_writel(1 << bit, icr[1]);
 		au_writel(1 << bit, icr[0]);
-		set_irq_chip_and_handler_name(irq, chip,
-				handle_edge_irq, "bothedge");
+		handler = handle_edge_irq;
+		name = "bothedge";
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:	/* 1:0:1 */
 		au_writel(1 << bit, icr[2]);
 		au_writel(1 << bit, icr[4]);
 		au_writel(1 << bit, icr[0]);
-		set_irq_chip_and_handler_name(irq, chip,
-				handle_level_irq, "hilevel");
+		handler = handle_level_irq;
+		name = "hilevel";
 		break;
 	case IRQ_TYPE_LEVEL_LOW:	/* 1:1:0 */
 		au_writel(1 << bit, icr[2]);
 		au_writel(1 << bit, icr[1]);
 		au_writel(1 << bit, icr[3]);
-		set_irq_chip_and_handler_name(irq, chip,
-				handle_level_irq, "lowlevel");
+		handler = handle_level_irq;
+		name = "lowlevel";
 		break;
 	case IRQ_TYPE_NONE:		/* 0:0:0 */
 		au_writel(1 << bit, icr[5]);
 		au_writel(1 << bit, icr[4]);
 		au_writel(1 << bit, icr[3]);
-		/* set at least chip so we can call set_irq_type() on it */
-		set_irq_chip(irq, chip);
 		break;
 	default:
 		ret = -EINVAL;
 	}
+	__irq_set_chip_handler_name_locked(d->irq, chip, handler, name);
+
 	au_sync();
 
 	return ret;
@@ -504,11 +506,11 @@
 	 */
 	for (i = AU1000_INTC0_INT_BASE;
 	     (i < AU1000_INTC0_INT_BASE + 32); i++)
-		au1x_ic_settype(i, IRQ_TYPE_NONE);
+		au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE);
 
 	for (i = AU1000_INTC1_INT_BASE;
 	     (i < AU1000_INTC1_INT_BASE + 32); i++)
-		au1x_ic_settype(i, IRQ_TYPE_NONE);
+		au1x_ic_settype(irq_get_irq_data(i), IRQ_TYPE_NONE);
 
 	/*
 	 * Initialize IC0, which is fixed per processor.
@@ -526,7 +528,7 @@
 				au_writel(1 << bit, IC0_ASSIGNSET);
 		}
 
-		au1x_ic_settype(irq_nr, map->im_type);
+		au1x_ic_settype(irq_get_irq_data(irq_nr), map->im_type);
 		++map;
 	}
 

diff --git a/arch/mips/alchemy/devboards/bcsr.c b/arch/mips/alchemy/devboards/bcsr.c
index c52af88..f91c43a 100644
--- a/arch/mips/alchemy/devboards/bcsr.c
+++ b/arch/mips/alchemy/devboards/bcsr.c

@@ -97,26 +97,26 @@
  * CPLD generates tons of spurious interrupts (at least on my DB1200).
  *	-- mlau
  */
-static void bcsr_irq_mask(unsigned int irq_nr)
+static void bcsr_irq_mask(struct irq_data *d)
 {
-	unsigned short v = 1 << (irq_nr - bcsr_csc_base);
+	unsigned short v = 1 << (d->irq - bcsr_csc_base);
 	__raw_writew(v, bcsr_virt + BCSR_REG_INTCLR);
 	__raw_writew(v, bcsr_virt + BCSR_REG_MASKCLR);
 	wmb();
 }
 
-static void bcsr_irq_maskack(unsigned int irq_nr)
+static void bcsr_irq_maskack(struct irq_data *d)
 {
-	unsigned short v = 1 << (irq_nr - bcsr_csc_base);
+	unsigned short v = 1 << (d->irq - bcsr_csc_base);
 	__raw_writew(v, bcsr_virt + BCSR_REG_INTCLR);
 	__raw_writew(v, bcsr_virt + BCSR_REG_MASKCLR);
 	__raw_writew(v, bcsr_virt + BCSR_REG_INTSTAT);	/* ack */
 	wmb();
 }
 
-static void bcsr_irq_unmask(unsigned int irq_nr)
+static void bcsr_irq_unmask(struct irq_data *d)
 {
-	unsigned short v = 1 << (irq_nr - bcsr_csc_base);
+	unsigned short v = 1 << (d->irq - bcsr_csc_base);
 	__raw_writew(v, bcsr_virt + BCSR_REG_INTSET);
 	__raw_writew(v, bcsr_virt + BCSR_REG_MASKSET);
 	wmb();
@@ -124,9 +124,9 @@
 
 static struct irq_chip bcsr_irq_type = {
 	.name		= "CPLD",
-	.mask		= bcsr_irq_mask,
-	.mask_ack	= bcsr_irq_maskack,
-	.unmask		= bcsr_irq_unmask,
+	.irq_mask	= bcsr_irq_mask,
+	.irq_mask_ack	= bcsr_irq_maskack,
+	.irq_unmask	= bcsr_irq_unmask,
 };
 
 void __init bcsr_init_irq(int csc_start, int csc_end, int hook_irq)

diff --git a/arch/mips/ar7/irq.c b/arch/mips/ar7/irq.c
index 4ec2642..a6484b6 100644
--- a/arch/mips/ar7/irq.c
+++ b/arch/mips/ar7/irq.c

@@ -49,51 +49,51 @@
 
 static int ar7_irq_base;
 
-static void ar7_unmask_irq(unsigned int irq)
+static void ar7_unmask_irq(struct irq_data *d)
 {
-	writel(1 << ((irq - ar7_irq_base) % 32),
-	       REG(ESR_OFFSET(irq - ar7_irq_base)));
+	writel(1 << ((d->irq - ar7_irq_base) % 32),
+	       REG(ESR_OFFSET(d->irq - ar7_irq_base)));
 }
 
-static void ar7_mask_irq(unsigned int irq)
+static void ar7_mask_irq(struct irq_data *d)
 {
-	writel(1 << ((irq - ar7_irq_base) % 32),
-	       REG(ECR_OFFSET(irq - ar7_irq_base)));
+	writel(1 << ((d->irq - ar7_irq_base) % 32),
+	       REG(ECR_OFFSET(d->irq - ar7_irq_base)));
 }
 
-static void ar7_ack_irq(unsigned int irq)
+static void ar7_ack_irq(struct irq_data *d)
 {
-	writel(1 << ((irq - ar7_irq_base) % 32),
-	       REG(CR_OFFSET(irq - ar7_irq_base)));
+	writel(1 << ((d->irq - ar7_irq_base) % 32),
+	       REG(CR_OFFSET(d->irq - ar7_irq_base)));
 }
 
-static void ar7_unmask_sec_irq(unsigned int irq)
+static void ar7_unmask_sec_irq(struct irq_data *d)
 {
-	writel(1 << (irq - ar7_irq_base - 40), REG(SEC_ESR_OFFSET));
+	writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ESR_OFFSET));
 }
 
-static void ar7_mask_sec_irq(unsigned int irq)
+static void ar7_mask_sec_irq(struct irq_data *d)
 {
-	writel(1 << (irq - ar7_irq_base - 40), REG(SEC_ECR_OFFSET));
+	writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_ECR_OFFSET));
 }
 
-static void ar7_ack_sec_irq(unsigned int irq)
+static void ar7_ack_sec_irq(struct irq_data *d)
 {
-	writel(1 << (irq - ar7_irq_base - 40), REG(SEC_CR_OFFSET));
+	writel(1 << (d->irq - ar7_irq_base - 40), REG(SEC_CR_OFFSET));
 }
 
 static struct irq_chip ar7_irq_type = {
 	.name = "AR7",
-	.unmask = ar7_unmask_irq,
-	.mask = ar7_mask_irq,
-	.ack = ar7_ack_irq
+	.irq_unmask = ar7_unmask_irq,
+	.irq_mask = ar7_mask_irq,
+	.irq_ack = ar7_ack_irq
 };
 
 static struct irq_chip ar7_sec_irq_type = {
 	.name = "AR7",
-	.unmask = ar7_unmask_sec_irq,
-	.mask = ar7_mask_sec_irq,
-	.ack = ar7_ack_sec_irq,
+	.irq_unmask = ar7_unmask_sec_irq,
+	.irq_mask = ar7_mask_sec_irq,
+	.irq_ack = ar7_ack_sec_irq,
 };
 
 static struct irqaction ar7_cascade_action = {

diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c
index 1bf7f71..7c02bc9 100644
--- a/arch/mips/ath79/irq.c
+++ b/arch/mips/ath79/irq.c

@@ -62,13 +62,12 @@
 		spurious_interrupt();
 }
 
-static void ar71xx_misc_irq_unmask(unsigned int irq)
+static void ar71xx_misc_irq_unmask(struct irq_data *d)
 {
+	unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
 	void __iomem *base = ath79_reset_base;
 	u32 t;
 
-	irq -= ATH79_MISC_IRQ_BASE;
-
 	t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
 	__raw_writel(t | (1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
 
@@ -76,13 +75,12 @@
 	__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
 }
 
-static void ar71xx_misc_irq_mask(unsigned int irq)
+static void ar71xx_misc_irq_mask(struct irq_data *d)
 {
+	unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
 	void __iomem *base = ath79_reset_base;
 	u32 t;
 
-	irq -= ATH79_MISC_IRQ_BASE;
-
 	t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
 	__raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_ENABLE);
 
@@ -90,13 +88,12 @@
 	__raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
 }
 
-static void ar724x_misc_irq_ack(unsigned int irq)
+static void ar724x_misc_irq_ack(struct irq_data *d)
 {
+	unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
 	void __iomem *base = ath79_reset_base;
 	u32 t;
 
-	irq -= ATH79_MISC_IRQ_BASE;
-
 	t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
 	__raw_writel(t & ~(1 << irq), base + AR71XX_RESET_REG_MISC_INT_STATUS);
 
@@ -106,8 +103,8 @@
 
 static struct irq_chip ath79_misc_irq_chip = {
 	.name		= "MISC",
-	.unmask		= ar71xx_misc_irq_unmask,
-	.mask		= ar71xx_misc_irq_mask,
+	.irq_unmask	= ar71xx_misc_irq_unmask,
+	.irq_mask	= ar71xx_misc_irq_mask,
 };
 
 static void __init ath79_misc_irq_init(void)
@@ -119,15 +116,14 @@
 	__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
 
 	if (soc_is_ar71xx() || soc_is_ar913x())
-		ath79_misc_irq_chip.mask_ack = ar71xx_misc_irq_mask;
+		ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
 	else if (soc_is_ar724x())
-		ath79_misc_irq_chip.ack = ar724x_misc_irq_ack;
+		ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
 	else
 		BUG();
 
 	for (i = ATH79_MISC_IRQ_BASE;
 	     i < ATH79_MISC_IRQ_BASE + ATH79_MISC_IRQ_COUNT; i++) {
-		irq_desc[i].status = IRQ_DISABLED;
 		set_irq_chip_and_handler(i, &ath79_misc_irq_chip,
 					 handle_level_irq);
 	}

diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c
index 3be87f2..1691531 100644
--- a/arch/mips/bcm63xx/irq.c
+++ b/arch/mips/bcm63xx/irq.c

@@ -76,88 +76,80 @@
  * internal IRQs operations: only mask/unmask on PERF irq mask
  * register.
  */
-static inline void bcm63xx_internal_irq_mask(unsigned int irq)
+static inline void bcm63xx_internal_irq_mask(struct irq_data *d)
 {
+	unsigned int irq = d->irq - IRQ_INTERNAL_BASE;
 	u32 mask;
 
-	irq -= IRQ_INTERNAL_BASE;
 	mask = bcm_perf_readl(PERF_IRQMASK_REG);
 	mask &= ~(1 << irq);
 	bcm_perf_writel(mask, PERF_IRQMASK_REG);
 }
 
-static void bcm63xx_internal_irq_unmask(unsigned int irq)
+static void bcm63xx_internal_irq_unmask(struct irq_data *d)
 {
+	unsigned int irq = d->irq - IRQ_INTERNAL_BASE;
 	u32 mask;
 
-	irq -= IRQ_INTERNAL_BASE;
 	mask = bcm_perf_readl(PERF_IRQMASK_REG);
 	mask |= (1 << irq);
 	bcm_perf_writel(mask, PERF_IRQMASK_REG);
 }
 
-static unsigned int bcm63xx_internal_irq_startup(unsigned int irq)
-{
-	bcm63xx_internal_irq_unmask(irq);
-	return 0;
-}
-
 /*
  * external IRQs operations: mask/unmask and clear on PERF external
  * irq control register.
  */
-static void bcm63xx_external_irq_mask(unsigned int irq)
+static void bcm63xx_external_irq_mask(struct irq_data *d)
 {
+	unsigned int irq = d->irq - IRQ_EXT_BASE;
 	u32 reg;
 
-	irq -= IRQ_EXT_BASE;
 	reg = bcm_perf_readl(PERF_EXTIRQ_CFG_REG);
 	reg &= ~EXTIRQ_CFG_MASK(irq);
 	bcm_perf_writel(reg, PERF_EXTIRQ_CFG_REG);
 }
 
-static void bcm63xx_external_irq_unmask(unsigned int irq)
+static void bcm63xx_external_irq_unmask(struct irq_data *d)
 {
+	unsigned int irq = d->irq - IRQ_EXT_BASE;
 	u32 reg;
 
-	irq -= IRQ_EXT_BASE;
 	reg = bcm_perf_readl(PERF_EXTIRQ_CFG_REG);
 	reg |= EXTIRQ_CFG_MASK(irq);
 	bcm_perf_writel(reg, PERF_EXTIRQ_CFG_REG);
 }
 
-static void bcm63xx_external_irq_clear(unsigned int irq)
+static void bcm63xx_external_irq_clear(struct irq_data *d)
 {
+	unsigned int irq = d->irq - IRQ_EXT_BASE;
 	u32 reg;
 
-	irq -= IRQ_EXT_BASE;
 	reg = bcm_perf_readl(PERF_EXTIRQ_CFG_REG);
 	reg |= EXTIRQ_CFG_CLEAR(irq);
 	bcm_perf_writel(reg, PERF_EXTIRQ_CFG_REG);
 }
 
-static unsigned int bcm63xx_external_irq_startup(unsigned int irq)
+static unsigned int bcm63xx_external_irq_startup(struct irq_data *d)
 {
-	set_c0_status(0x100 << (irq - IRQ_MIPS_BASE));
+	set_c0_status(0x100 << (d->irq - IRQ_MIPS_BASE));
 	irq_enable_hazard();
-	bcm63xx_external_irq_unmask(irq);
+	bcm63xx_external_irq_unmask(d);
 	return 0;
 }
 
-static void bcm63xx_external_irq_shutdown(unsigned int irq)
+static void bcm63xx_external_irq_shutdown(struct irq_data *d)
 {
-	bcm63xx_external_irq_mask(irq);
-	clear_c0_status(0x100 << (irq - IRQ_MIPS_BASE));
+	bcm63xx_external_irq_mask(d);
+	clear_c0_status(0x100 << (d->irq - IRQ_MIPS_BASE));
 	irq_disable_hazard();
 }
 
-static int bcm63xx_external_irq_set_type(unsigned int irq,
+static int bcm63xx_external_irq_set_type(struct irq_data *d,
 					 unsigned int flow_type)
 {
+	unsigned int irq = d->irq - IRQ_EXT_BASE;
 	u32 reg;
-	struct irq_desc *desc = irq_desc + irq;
-
-	irq -= IRQ_EXT_BASE;
 
 	flow_type &= IRQ_TYPE_SENSE_MASK;
 
@@ -199,37 +191,32 @@
 	}
 	bcm_perf_writel(reg, PERF_EXTIRQ_CFG_REG);
 
-	if (flow_type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))  {
-		desc->status |= IRQ_LEVEL;
-		desc->handle_irq = handle_level_irq;
-	} else {
-		desc->handle_irq = handle_edge_irq;
-	}
+	irqd_set_trigger_type(d, flow_type);
+	if (flow_type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
+		__irq_set_handler_locked(d->irq, handle_level_irq);
+	else
+		__irq_set_handler_locked(d->irq, handle_edge_irq);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 
 static struct irq_chip bcm63xx_internal_irq_chip = {
 	.name		= "bcm63xx_ipic",
-	.startup	= bcm63xx_internal_irq_startup,
-	.shutdown	= bcm63xx_internal_irq_mask,
-
-	.mask		= bcm63xx_internal_irq_mask,
-	.mask_ack	= bcm63xx_internal_irq_mask,
-	.unmask		= bcm63xx_internal_irq_unmask,
+	.irq_mask	= bcm63xx_internal_irq_mask,
+	.irq_unmask	= bcm63xx_internal_irq_unmask,
 };
 
 static struct irq_chip bcm63xx_external_irq_chip = {
 	.name		= "bcm63xx_epic",
-	.startup	= bcm63xx_external_irq_startup,
-	.shutdown	= bcm63xx_external_irq_shutdown,
+	.irq_startup	= bcm63xx_external_irq_startup,
+	.irq_shutdown	= bcm63xx_external_irq_shutdown,
 
-	.ack		= bcm63xx_external_irq_clear,
+	.irq_ack	= bcm63xx_external_irq_clear,
 
-	.mask		= bcm63xx_external_irq_mask,
-	.unmask		= bcm63xx_external_irq_unmask,
+	.irq_mask	= bcm63xx_external_irq_mask,
+	.irq_unmask	= bcm63xx_external_irq_unmask,
 
-	.set_type	= bcm63xx_external_irq_set_type,
+	.irq_set_type	= bcm63xx_external_irq_set_type,
 };
 
 static struct irqaction cpu_ip2_cascade_action = {

diff --git a/arch/mips/dec/ioasic-irq.c b/arch/mips/dec/ioasic-irq.c
index cb41954..8d9a5fc 100644
--- a/arch/mips/dec/ioasic-irq.c
+++ b/arch/mips/dec/ioasic-irq.c

@@ -17,80 +17,48 @@
 #include <asm/dec/ioasic_addrs.h>
 #include <asm/dec/ioasic_ints.h>
 
-
 static int ioasic_irq_base;
 
-
-static inline void unmask_ioasic_irq(unsigned int irq)
+static void unmask_ioasic_irq(struct irq_data *d)
 {
 	u32 simr;
 
 	simr = ioasic_read(IO_REG_SIMR);
-	simr |= (1 << (irq - ioasic_irq_base));
+	simr |= (1 << (d->irq - ioasic_irq_base));
 	ioasic_write(IO_REG_SIMR, simr);
 }
 
-static inline void mask_ioasic_irq(unsigned int irq)
+static void mask_ioasic_irq(struct irq_data *d)
 {
 	u32 simr;
 
 	simr = ioasic_read(IO_REG_SIMR);
-	simr &= ~(1 << (irq - ioasic_irq_base));
+	simr &= ~(1 << (d->irq - ioasic_irq_base));
 	ioasic_write(IO_REG_SIMR, simr);
 }
 
-static inline void clear_ioasic_irq(unsigned int irq)
+static void ack_ioasic_irq(struct irq_data *d)
 {
-	u32 sir;
-
-	sir = ~(1 << (irq - ioasic_irq_base));
-	ioasic_write(IO_REG_SIR, sir);
-}
-
-static inline void ack_ioasic_irq(unsigned int irq)
-{
-	mask_ioasic_irq(irq);
+	mask_ioasic_irq(d);
 	fast_iob();
 }
 
-static inline void end_ioasic_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
-		unmask_ioasic_irq(irq);
-}
-
 static struct irq_chip ioasic_irq_type = {
 	.name = "IO-ASIC",
-	.ack = ack_ioasic_irq,
-	.mask = mask_ioasic_irq,
-	.mask_ack = ack_ioasic_irq,
-	.unmask = unmask_ioasic_irq,
+	.irq_ack = ack_ioasic_irq,
+	.irq_mask = mask_ioasic_irq,
+	.irq_mask_ack = ack_ioasic_irq,
+	.irq_unmask = unmask_ioasic_irq,
 };
 
-
-#define unmask_ioasic_dma_irq unmask_ioasic_irq
-
-#define mask_ioasic_dma_irq mask_ioasic_irq
-
-#define ack_ioasic_dma_irq ack_ioasic_irq
-
-static inline void end_ioasic_dma_irq(unsigned int irq)
-{
-	clear_ioasic_irq(irq);
-	fast_iob();
-	end_ioasic_irq(irq);
-}
-
 static struct irq_chip ioasic_dma_irq_type = {
 	.name = "IO-ASIC-DMA",
-	.ack = ack_ioasic_dma_irq,
-	.mask = mask_ioasic_dma_irq,
-	.mask_ack = ack_ioasic_dma_irq,
-	.unmask = unmask_ioasic_dma_irq,
-	.end = end_ioasic_dma_irq,
+	.irq_ack = ack_ioasic_irq,
+	.irq_mask = mask_ioasic_irq,
+	.irq_mask_ack = ack_ioasic_irq,
+	.irq_unmask = unmask_ioasic_irq,
 };
 
-
 void __init init_ioasic_irqs(int base)
 {
 	int i;

diff --git a/arch/mips/dec/kn02-irq.c b/arch/mips/dec/kn02-irq.c
index ed90a8d..ef31d98 100644
--- a/arch/mips/dec/kn02-irq.c
+++ b/arch/mips/dec/kn02-irq.c

@@ -27,43 +27,40 @@
  */
 u32 cached_kn02_csr;
 
-
 static int kn02_irq_base;
 
-
-static inline void unmask_kn02_irq(unsigned int irq)
+static void unmask_kn02_irq(struct irq_data *d)
 {
 	volatile u32 *csr = (volatile u32 *)CKSEG1ADDR(KN02_SLOT_BASE +
 						       KN02_CSR);
 
-	cached_kn02_csr |= (1 << (irq - kn02_irq_base + 16));
+	cached_kn02_csr |= (1 << (d->irq - kn02_irq_base + 16));
 	*csr = cached_kn02_csr;
 }
 
-static inline void mask_kn02_irq(unsigned int irq)
+static void mask_kn02_irq(struct irq_data *d)
 {
 	volatile u32 *csr = (volatile u32 *)CKSEG1ADDR(KN02_SLOT_BASE +
 						       KN02_CSR);
 
-	cached_kn02_csr &= ~(1 << (irq - kn02_irq_base + 16));
+	cached_kn02_csr &= ~(1 << (d->irq - kn02_irq_base + 16));
 	*csr = cached_kn02_csr;
 }
 
-static void ack_kn02_irq(unsigned int irq)
+static void ack_kn02_irq(struct irq_data *d)
 {
-	mask_kn02_irq(irq);
+	mask_kn02_irq(d);
 	iob();
 }
 
 static struct irq_chip kn02_irq_type = {
 	.name = "KN02-CSR",
-	.ack = ack_kn02_irq,
-	.mask = mask_kn02_irq,
-	.mask_ack = ack_kn02_irq,
-	.unmask = unmask_kn02_irq,
+	.irq_ack = ack_kn02_irq,
+	.irq_mask = mask_kn02_irq,
+	.irq_mask_ack = ack_kn02_irq,
+	.irq_unmask = unmask_kn02_irq,
 };
 
-
 void __init init_kn02_irqs(int base)
 {
 	volatile u32 *csr = (volatile u32 *)CKSEG1ADDR(KN02_SLOT_BASE +

diff --git a/arch/mips/emma/markeins/irq.c b/arch/mips/emma/markeins/irq.c
index 3a96799..9b1207a 100644
--- a/arch/mips/emma/markeins/irq.c
+++ b/arch/mips/emma/markeins/irq.c

@@ -34,13 +34,10 @@
 
 #include <asm/emma/emma2rh.h>
 
-static void emma2rh_irq_enable(unsigned int irq)
+static void emma2rh_irq_enable(struct irq_data *d)
 {
-	u32 reg_value;
-	u32 reg_bitmask;
-	u32 reg_index;
-
-	irq -= EMMA2RH_IRQ_BASE;
+	unsigned int irq = d->irq - EMMA2RH_IRQ_BASE;
+	u32 reg_value, reg_bitmask, reg_index;
 
 	reg_index = EMMA2RH_BHIF_INT_EN_0 +
 		    (EMMA2RH_BHIF_INT_EN_1 - EMMA2RH_BHIF_INT_EN_0) * (irq / 32);
@@ -49,13 +46,10 @@
 	emma2rh_out32(reg_index, reg_value | reg_bitmask);
 }
 
-static void emma2rh_irq_disable(unsigned int irq)
+static void emma2rh_irq_disable(struct irq_data *d)
 {
-	u32 reg_value;
-	u32 reg_bitmask;
-	u32 reg_index;
-
-	irq -= EMMA2RH_IRQ_BASE;
+	unsigned int irq = d->irq - EMMA2RH_IRQ_BASE;
+	u32 reg_value, reg_bitmask, reg_index;
 
 	reg_index = EMMA2RH_BHIF_INT_EN_0 +
 		    (EMMA2RH_BHIF_INT_EN_1 - EMMA2RH_BHIF_INT_EN_0) * (irq / 32);
@@ -66,10 +60,8 @@
 
 struct irq_chip emma2rh_irq_controller = {
 	.name = "emma2rh_irq",
-	.ack = emma2rh_irq_disable,
-	.mask = emma2rh_irq_disable,
-	.mask_ack = emma2rh_irq_disable,
-	.unmask = emma2rh_irq_enable,
+	.irq_mask = emma2rh_irq_disable,
+	.irq_unmask = emma2rh_irq_enable,
 };
 
 void emma2rh_irq_init(void)
@@ -82,23 +74,21 @@
 					      handle_level_irq, "level");
 }
 
-static void emma2rh_sw_irq_enable(unsigned int irq)
+static void emma2rh_sw_irq_enable(struct irq_data *d)
 {
+	unsigned int irq = d->irq - EMMA2RH_SW_IRQ_BASE;
 	u32 reg;
 
-	irq -= EMMA2RH_SW_IRQ_BASE;
-
 	reg = emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN);
 	reg |= 1 << irq;
 	emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, reg);
 }
 
-static void emma2rh_sw_irq_disable(unsigned int irq)
+static void emma2rh_sw_irq_disable(struct irq_data *d)
 {
+	unsigned int irq = d->irq - EMMA2RH_SW_IRQ_BASE;
 	u32 reg;
 
-	irq -= EMMA2RH_SW_IRQ_BASE;
-
 	reg = emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN);
 	reg &= ~(1 << irq);
 	emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, reg);
@@ -106,10 +96,8 @@
 
 struct irq_chip emma2rh_sw_irq_controller = {
 	.name = "emma2rh_sw_irq",
-	.ack = emma2rh_sw_irq_disable,
-	.mask = emma2rh_sw_irq_disable,
-	.mask_ack = emma2rh_sw_irq_disable,
-	.unmask = emma2rh_sw_irq_enable,
+	.irq_mask = emma2rh_sw_irq_disable,
+	.irq_unmask = emma2rh_sw_irq_enable,
 };
 
 void emma2rh_sw_irq_init(void)
@@ -122,39 +110,38 @@
 					      handle_level_irq, "level");
 }
 
-static void emma2rh_gpio_irq_enable(unsigned int irq)
+static void emma2rh_gpio_irq_enable(struct irq_data *d)
 {
+	unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE;
 	u32 reg;
 
-	irq -= EMMA2RH_GPIO_IRQ_BASE;
-
 	reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK);
 	reg |= 1 << irq;
 	emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg);
 }
 
-static void emma2rh_gpio_irq_disable(unsigned int irq)
+static void emma2rh_gpio_irq_disable(struct irq_data *d)
 {
+	unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE;
 	u32 reg;
 
-	irq -= EMMA2RH_GPIO_IRQ_BASE;
-
 	reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK);
 	reg &= ~(1 << irq);
 	emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg);
 }
 
-static void emma2rh_gpio_irq_ack(unsigned int irq)
+static void emma2rh_gpio_irq_ack(struct irq_data *d)
 {
-	irq -= EMMA2RH_GPIO_IRQ_BASE;
+	unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE;
+
 	emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~(1 << irq));
 }
 
-static void emma2rh_gpio_irq_mask_ack(unsigned int irq)
+static void emma2rh_gpio_irq_mask_ack(struct irq_data *d)
 {
+	unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE;
 	u32 reg;
 
-	irq -= EMMA2RH_GPIO_IRQ_BASE;
 	emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~(1 << irq));
 
 	reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK);
@@ -164,10 +151,10 @@
 
 struct irq_chip emma2rh_gpio_irq_controller = {
 	.name = "emma2rh_gpio_irq",
-	.ack = emma2rh_gpio_irq_ack,
-	.mask = emma2rh_gpio_irq_disable,
-	.mask_ack = emma2rh_gpio_irq_mask_ack,
-	.unmask = emma2rh_gpio_irq_enable,
+	.irq_ack = emma2rh_gpio_irq_ack,
+	.irq_mask = emma2rh_gpio_irq_disable,
+	.irq_mask_ack = emma2rh_gpio_irq_mask_ack,
+	.irq_unmask = emma2rh_gpio_irq_enable,
 };
 
 void emma2rh_gpio_irq_init(void)

diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index b003ed5..0ec0129 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h

@@ -55,9 +55,9 @@
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
 #include <linux/cpumask.h>
 
-extern int plat_set_irq_affinity(unsigned int irq,
-				  const struct cpumask *affinity);
-extern void smtc_forward_irq(unsigned int irq);
+extern int plat_set_irq_affinity(struct irq_data *d,
+				 const struct cpumask *affinity, bool force);
+extern void smtc_forward_irq(struct irq_data *d);
 
 /*
  * IRQ affinity hook invoked at the beginning of interrupt dispatch
@@ -70,51 +70,53 @@
  * cpumask implementations, this version is optimistically assuming
  * that cpumask.h macro overhead is reasonable during interrupt dispatch.
  */
-#define IRQ_AFFINITY_HOOK(irq)						\
-do {									\
-    if (!cpumask_test_cpu(smp_processor_id(), irq_desc[irq].affinity)) {\
-	smtc_forward_irq(irq);						\
-	irq_exit();							\
-	return;								\
-    }									\
-} while (0)
+static inline int handle_on_other_cpu(unsigned int irq)
+{
+	struct irq_data *d = irq_get_irq_data(irq);
+
+	if (cpumask_test_cpu(smp_processor_id(), d->affinity))
+		return 0;
+	smtc_forward_irq(d);
+	return 1;
+}
 
 #else /* Not doing SMTC affinity */
 
-#define IRQ_AFFINITY_HOOK(irq) do { } while (0)
+static inline int handle_on_other_cpu(unsigned int irq) { return 0; }
 
 #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
 
 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP
 
+static inline void smtc_im_backstop(unsigned int irq)
+{
+	if (irq_hwmask[irq] & 0x0000ff00)
+		write_c0_tccontext(read_c0_tccontext() &
+				   ~(irq_hwmask[irq] & 0x0000ff00));
+}
+
 /*
  * Clear interrupt mask handling "backstop" if irq_hwmask
  * entry so indicates. This implies that the ack() or end()
  * functions will take over re-enabling the low-level mask.
  * Otherwise it will be done on return from exception.
  */
-#define __DO_IRQ_SMTC_HOOK(irq)						\
-do {									\
-	IRQ_AFFINITY_HOOK(irq);						\
-	if (irq_hwmask[irq] & 0x0000ff00)				\
-		write_c0_tccontext(read_c0_tccontext() &		\
-				   ~(irq_hwmask[irq] & 0x0000ff00));	\
-} while (0)
+static inline int smtc_handle_on_other_cpu(unsigned int irq)
+{
+	int ret = handle_on_other_cpu(irq);
 
-#define __NO_AFFINITY_IRQ_SMTC_HOOK(irq)				\
-do {									\
-	if (irq_hwmask[irq] & 0x0000ff00)                               \
-		write_c0_tccontext(read_c0_tccontext() &		\
-				   ~(irq_hwmask[irq] & 0x0000ff00));	\
-} while (0)
+	if (!ret)
+		smtc_im_backstop(irq);
+	return ret;
+}
 
 #else
 
-#define __DO_IRQ_SMTC_HOOK(irq)						\
-do {									\
-	IRQ_AFFINITY_HOOK(irq);						\
-} while (0)
-#define __NO_AFFINITY_IRQ_SMTC_HOOK(irq) do { } while (0)
+static inline void smtc_im_backstop(unsigned int irq) { }
+static inline int smtc_handle_on_other_cpu(unsigned int irq)
+{
+	return handle_on_other_cpu(irq);
+}
 
 #endif
 

diff --git a/arch/mips/include/asm/pmc-sierra/msp71xx/cpu-feature-overrides.h b/arch/mips/include/asm/pmc-sierra/msp71xx/cpu-feature-overrides.h
new file mode 100644
index 0000000..a80801b
--- /dev/null
+++ b/arch/mips/include/asm/pmc-sierra/msp71xx/cpu-feature-overrides.h

@@ -0,0 +1,21 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003, 04, 07 Ralf Baechle (ralf@linux-mips.org)
+ */
+#ifndef __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_mips16		1
+#define cpu_has_dsp		1
+#define cpu_has_mipsmt		1
+#define cpu_has_fpu		0
+
+#define cpu_has_mips32r1	0
+#define cpu_has_mips32r2	1
+#define cpu_has_mips64r1	0
+#define cpu_has_mips64r2	0
+
+#endif /* __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H */

diff --git a/arch/mips/include/asm/pmc-sierra/msp71xx/msp_gpio_macros.h b/arch/mips/include/asm/pmc-sierra/msp71xx/msp_gpio_macros.h
new file mode 100644
index 0000000..156f320
--- /dev/null
+++ b/arch/mips/include/asm/pmc-sierra/msp71xx/msp_gpio_macros.h

@@ -0,0 +1,343 @@
+/*
+ *
+ * Macros for external SMP-safe access to the PMC MSP71xx reference
+ * board GPIO pins
+ *
+ * Copyright 2010 PMC-Sierra, Inc.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __MSP_GPIO_MACROS_H__
+#define __MSP_GPIO_MACROS_H__
+
+#include <msp_regops.h>
+#include <msp_regs.h>
+
+#ifdef CONFIG_PMC_MSP7120_GW
+#define MSP_NUM_GPIOS		20
+#else
+#define MSP_NUM_GPIOS		28
+#endif
+
+/* -- GPIO Enumerations -- */
+enum msp_gpio_data {
+	MSP_GPIO_LO = 0,
+	MSP_GPIO_HI = 1,
+	MSP_GPIO_NONE,		/* Special - Means pin is out of range */
+	MSP_GPIO_TOGGLE,	/* Special - Sets pin to opposite */
+};
+
+enum msp_gpio_mode {
+	MSP_GPIO_INPUT		= 0x0,
+	/* MSP_GPIO_ INTERRUPT	= 0x1,	Not supported yet */
+	MSP_GPIO_UART_INPUT	= 0x2,	/* Only GPIO 4 or 5 */
+	MSP_GPIO_OUTPUT		= 0x8,
+	MSP_GPIO_UART_OUTPUT	= 0x9,	/* Only GPIO 2 or 3 */
+	MSP_GPIO_PERIF_TIMERA	= 0x9,	/* Only GPIO 0 or 1 */
+	MSP_GPIO_PERIF_TIMERB	= 0xa,	/* Only GPIO 0 or 1 */
+	MSP_GPIO_UNKNOWN	= 0xb,  /* No such GPIO or mode */
+};
+
+/* -- Static Tables -- */
+
+/* Maps pins to data register */
+static volatile u32 * const MSP_GPIO_DATA_REGISTER[] = {
+	/* GPIO 0 and 1 on the first register */
+	GPIO_DATA1_REG, GPIO_DATA1_REG,
+	/* GPIO 2, 3, 4, and 5 on the second register */
+	GPIO_DATA2_REG, GPIO_DATA2_REG, GPIO_DATA2_REG, GPIO_DATA2_REG,
+	/* GPIO 6, 7, 8, and 9 on the third register */
+	GPIO_DATA3_REG, GPIO_DATA3_REG, GPIO_DATA3_REG, GPIO_DATA3_REG,
+	/* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */
+	GPIO_DATA4_REG, GPIO_DATA4_REG, GPIO_DATA4_REG, GPIO_DATA4_REG,
+	GPIO_DATA4_REG, GPIO_DATA4_REG,
+	/* GPIO 16 - 23 on the first strange EXTENDED register */
+	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
+	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
+	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
+	/* GPIO 24 - 27 on the second strange EXTENDED register */
+	EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG,
+	EXTENDED_GPIO2_REG,
+};
+
+/* Maps pins to mode register */
+static volatile u32 * const MSP_GPIO_MODE_REGISTER[] = {
+	/* GPIO 0 and 1 on the first register */
+	GPIO_CFG1_REG, GPIO_CFG1_REG,
+	/* GPIO 2, 3, 4, and 5 on the second register */
+	GPIO_CFG2_REG, GPIO_CFG2_REG, GPIO_CFG2_REG, GPIO_CFG2_REG,
+	/* GPIO 6, 7, 8, and 9 on the third register */
+	GPIO_CFG3_REG, GPIO_CFG3_REG, GPIO_CFG3_REG, GPIO_CFG3_REG,
+	/* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */
+	GPIO_CFG4_REG, GPIO_CFG4_REG, GPIO_CFG4_REG, GPIO_CFG4_REG,
+	GPIO_CFG4_REG, GPIO_CFG4_REG,
+	/* GPIO 16 - 23 on the first strange EXTENDED register */
+	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
+	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
+	EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG,
+	/* GPIO 24 - 27 on the second strange EXTENDED register */
+	EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG,
+	EXTENDED_GPIO2_REG,
+};
+
+/* Maps 'basic' pins to relative offset from 0 per register */
+static int MSP_GPIO_OFFSET[] = {
+	/* GPIO 0 and 1 on the first register */
+	0, 0,
+	/* GPIO 2, 3, 4, and 5 on the second register */
+	2, 2, 2, 2,
+	/* GPIO 6, 7, 8, and 9 on the third register */
+	6, 6, 6, 6,
+	/* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */
+	10, 10, 10, 10, 10, 10,
+};
+
+/* Maps MODE to allowed pin mask */
+static unsigned int MSP_GPIO_MODE_ALLOWED[] = {
+	0xffffffff,	/* Mode 0 - INPUT */
+	0x00000,	/* Mode 1 - INTERRUPT */
+	0x00030,	/* Mode 2 - UART_INPUT (GPIO 4, 5)*/
+	0, 0, 0, 0, 0,	/* Modes 3, 4, 5, 6, and 7 are reserved */
+	0xffffffff,	/* Mode 8 - OUTPUT */
+	0x0000f,	/* Mode 9 - UART_OUTPUT/
+				PERF_TIMERA (GPIO 0, 1, 2, 3) */
+	0x00003,	/* Mode a - PERF_TIMERB (GPIO 0, 1) */
+	0x00000,	/* Mode b - Not really a mode! */
+};
+
+/* -- Bit masks -- */
+
+/* This gives you the 'register relative offset gpio' number */
+#define OFFSET_GPIO_NUMBER(gpio)	(gpio - MSP_GPIO_OFFSET[gpio])
+
+/* These take the 'register relative offset gpio' number */
+#define BASIC_DATA_REG_MASK(ogpio)		(1 << ogpio)
+#define BASIC_MODE_REG_VALUE(mode, ogpio)	\
+	(mode << BASIC_MODE_REG_SHIFT(ogpio))
+#define BASIC_MODE_REG_MASK(ogpio)		\
+	BASIC_MODE_REG_VALUE(0xf, ogpio)
+#define BASIC_MODE_REG_SHIFT(ogpio)		(ogpio * 4)
+#define BASIC_MODE_REG_FROM_REG(data, ogpio)	\
+	((data & BASIC_MODE_REG_MASK(ogpio)) >> BASIC_MODE_REG_SHIFT(ogpio))
+
+/* These take the actual GPIO number (0 through 15) */
+#define BASIC_DATA_MASK(gpio)	\
+	BASIC_DATA_REG_MASK(OFFSET_GPIO_NUMBER(gpio))
+#define BASIC_MODE_MASK(gpio)	\
+	BASIC_MODE_REG_MASK(OFFSET_GPIO_NUMBER(gpio))
+#define BASIC_MODE(mode, gpio)	\
+	BASIC_MODE_REG_VALUE(mode, OFFSET_GPIO_NUMBER(gpio))
+#define BASIC_MODE_SHIFT(gpio)	\
+	BASIC_MODE_REG_SHIFT(OFFSET_GPIO_NUMBER(gpio))
+#define BASIC_MODE_FROM_REG(data, gpio)	\
+	BASIC_MODE_REG_FROM_REG(data, OFFSET_GPIO_NUMBER(gpio))
+
+/*
+ * Each extended GPIO register is 32 bits long and is responsible for up to
+ * eight GPIOs. The least significant 16 bits contain the set and clear bit
+ * pair for each of the GPIOs. The most significant 16 bits contain the
+ * disable and enable bit pair for each of the GPIOs. For example, the
+ * extended GPIO reg for GPIOs 16-23 is as follows:
+ *
+ *	31: GPIO23_DISABLE
+ *	...
+ *	19: GPIO17_DISABLE
+ *	18: GPIO17_ENABLE
+ *	17: GPIO16_DISABLE
+ *	16: GPIO16_ENABLE
+ *	...
+ *	3:  GPIO17_SET
+ *	2:  GPIO17_CLEAR
+ *	1:  GPIO16_SET
+ *	0:  GPIO16_CLEAR
+ */
+
+/* This gives the 'register relative offset gpio' number */
+#define EXTENDED_OFFSET_GPIO(gpio)	(gpio < 24 ? gpio - 16 : gpio - 24)
+
+/* These take the 'register relative offset gpio' number */
+#define EXTENDED_REG_DISABLE(ogpio)	(0x2 << ((ogpio * 2) + 16))
+#define EXTENDED_REG_ENABLE(ogpio)	(0x1 << ((ogpio * 2) + 16))
+#define EXTENDED_REG_SET(ogpio)		(0x2 << (ogpio * 2))
+#define EXTENDED_REG_CLR(ogpio)		(0x1 << (ogpio * 2))
+
+/* These take the actual GPIO number (16 through 27) */
+#define EXTENDED_DISABLE(gpio)	\
+	EXTENDED_REG_DISABLE(EXTENDED_OFFSET_GPIO(gpio))
+#define EXTENDED_ENABLE(gpio)	\
+	EXTENDED_REG_ENABLE(EXTENDED_OFFSET_GPIO(gpio))
+#define EXTENDED_SET(gpio)	\
+	EXTENDED_REG_SET(EXTENDED_OFFSET_GPIO(gpio))
+#define EXTENDED_CLR(gpio)	\
+	EXTENDED_REG_CLR(EXTENDED_OFFSET_GPIO(gpio))
+
+#define EXTENDED_FULL_MASK		(0xffffffff)
+
+/* -- API inline-functions -- */
+
+/*
+ * Gets the current value of the specified pin
+ */
+static inline enum msp_gpio_data msp_gpio_pin_get(unsigned int gpio)
+{
+	u32 pinhi_mask = 0, pinhi_mask2 = 0;
+
+	if (gpio >= MSP_NUM_GPIOS)
+		return MSP_GPIO_NONE;
+
+	if (gpio < 16) {
+		pinhi_mask = BASIC_DATA_MASK(gpio);
+	} else {
+		/*
+		 * Two cases are possible with the EXTENDED register:
+		 *  - In output mode (ENABLED flag set), check the CLR bit
+		 *  - In input mode (ENABLED flag not set), check the SET bit
+		 */
+		pinhi_mask = EXTENDED_ENABLE(gpio) | EXTENDED_CLR(gpio);
+		pinhi_mask2 = EXTENDED_SET(gpio);
+	}
+	if (((*MSP_GPIO_DATA_REGISTER[gpio] & pinhi_mask) == pinhi_mask) ||
+	    (*MSP_GPIO_DATA_REGISTER[gpio] & pinhi_mask2))
+		return MSP_GPIO_HI;
+	else
+		return MSP_GPIO_LO;
+}
+
+/* Sets the specified pin to the specified value */
+static inline void msp_gpio_pin_set(enum msp_gpio_data data, unsigned int gpio)
+{
+	if (gpio >= MSP_NUM_GPIOS)
+		return;
+
+	if (gpio < 16) {
+		if (data == MSP_GPIO_TOGGLE)
+			toggle_reg32(MSP_GPIO_DATA_REGISTER[gpio],
+					BASIC_DATA_MASK(gpio));
+		else if (data == MSP_GPIO_HI)
+			set_reg32(MSP_GPIO_DATA_REGISTER[gpio],
+					BASIC_DATA_MASK(gpio));
+		else
+			clear_reg32(MSP_GPIO_DATA_REGISTER[gpio],
+					BASIC_DATA_MASK(gpio));
+	} else {
+		if (data == MSP_GPIO_TOGGLE) {
+			/* Special ugly case:
+			 *   We have to read the CLR bit.
+			 *   If set, we write the CLR bit.
+			 *   If not, we write the SET bit.
+			 */
+			u32 tmpdata;
+
+			custom_read_reg32(MSP_GPIO_DATA_REGISTER[gpio],
+								tmpdata);
+			if (tmpdata & EXTENDED_CLR(gpio))
+				tmpdata = EXTENDED_CLR(gpio);
+			else
+				tmpdata = EXTENDED_SET(gpio);
+			custom_write_reg32(MSP_GPIO_DATA_REGISTER[gpio],
+								tmpdata);
+		} else {
+			u32 newdata;
+
+			if (data == MSP_GPIO_HI)
+				newdata = EXTENDED_SET(gpio);
+			else
+				newdata = EXTENDED_CLR(gpio);
+			set_value_reg32(MSP_GPIO_DATA_REGISTER[gpio],
+						EXTENDED_FULL_MASK, newdata);
+		}
+	}
+}
+
+/* Sets the specified pin to the specified value */
+static inline void msp_gpio_pin_hi(unsigned int gpio)
+{
+	msp_gpio_pin_set(MSP_GPIO_HI, gpio);
+}
+
+/* Sets the specified pin to the specified value */
+static inline void msp_gpio_pin_lo(unsigned int gpio)
+{
+	msp_gpio_pin_set(MSP_GPIO_LO, gpio);
+}
+
+/* Sets the specified pin to the opposite value */
+static inline void msp_gpio_pin_toggle(unsigned int gpio)
+{
+	msp_gpio_pin_set(MSP_GPIO_TOGGLE, gpio);
+}
+
+/* Gets the mode of the specified pin */
+static inline enum msp_gpio_mode msp_gpio_pin_get_mode(unsigned int gpio)
+{
+	enum msp_gpio_mode retval = MSP_GPIO_UNKNOWN;
+	uint32_t data;
+
+	if (gpio >= MSP_NUM_GPIOS)
+		return retval;
+
+	data = *MSP_GPIO_MODE_REGISTER[gpio];
+
+	if (gpio < 16) {
+		retval = BASIC_MODE_FROM_REG(data, gpio);
+	} else {
+		/* Extended pins can only be either INPUT or OUTPUT */
+		if (data & EXTENDED_ENABLE(gpio))
+			retval = MSP_GPIO_OUTPUT;
+		else
+			retval = MSP_GPIO_INPUT;
+	}
+
+	return retval;
+}
+
+/*
+ * Sets the specified mode on the requested pin
+ * Returns 0 on success, or -1 if that mode is not allowed on this pin
+ */
+static inline int msp_gpio_pin_mode(enum msp_gpio_mode mode, unsigned int gpio)
+{
+	u32 modemask, newmode;
+
+	if ((1 << gpio) & ~MSP_GPIO_MODE_ALLOWED[mode])
+		return -1;
+
+	if (gpio >= MSP_NUM_GPIOS)
+		return -1;
+
+	if (gpio < 16) {
+		modemask = BASIC_MODE_MASK(gpio);
+		newmode =  BASIC_MODE(mode, gpio);
+	} else {
+		modemask = EXTENDED_FULL_MASK;
+		if (mode == MSP_GPIO_INPUT)
+			newmode = EXTENDED_DISABLE(gpio);
+		else
+			newmode = EXTENDED_ENABLE(gpio);
+	}
+	/* Do the set atomically */
+	set_value_reg32(MSP_GPIO_MODE_REGISTER[gpio], modemask, newmode);
+
+	return 0;
+}
+
+#endif /* __MSP_GPIO_MACROS_H__ */

diff --git a/arch/mips/include/asm/pmc-sierra/msp71xx/msp_regs.h b/arch/mips/include/asm/pmc-sierra/msp71xx/msp_regs.h
index 603eb73..692c1b6 100644
--- a/arch/mips/include/asm/pmc-sierra/msp71xx/msp_regs.h
+++ b/arch/mips/include/asm/pmc-sierra/msp71xx/msp_regs.h

@@ -91,12 +91,10 @@
 					/* MAC C device registers       */
 #define MSP_ADSL2_BASE		(MSP_MSB_BASE + 0xA80000)
 					/* ADSL2 device registers       */
-#define MSP_USB_BASE		(MSP_MSB_BASE + 0xB40000)
-					/* USB device registers         */
-#define MSP_USB_BASE_START	(MSP_MSB_BASE + 0xB40100)
-					/* USB device registers         */
-#define MSP_USB_BASE_END	(MSP_MSB_BASE + 0xB401FF)
-					/* USB device registers         */
+#define MSP_USB0_BASE		(MSP_MSB_BASE + 0xB00000)
+					/* USB0 device registers        */
+#define MSP_USB1_BASE		(MSP_MSB_BASE + 0x300000)
+					/* USB1 device registers	*/
 #define MSP_CPUIF_BASE		(MSP_MSB_BASE + 0xC00000)
 					/* CPU interface registers      */
 
@@ -319,8 +317,11 @@
 #define CPU_ERR2_REG		regptr(MSP_SLP_BASE + 0x184)
 					/* CPU/SLP Error status 1       */
 
-#define EXTENDED_GPIO_REG	regptr(MSP_SLP_BASE + 0x188)
-					/* Extended GPIO register       */
+/* Extended GPIO registers       */
+#define EXTENDED_GPIO1_REG	regptr(MSP_SLP_BASE + 0x188)
+#define EXTENDED_GPIO2_REG	regptr(MSP_SLP_BASE + 0x18c)
+#define EXTENDED_GPIO_REG	EXTENDED_GPIO1_REG
+					/* Backward-compatibility	*/
 
 /* System Error registers */
 #define SLP_ERR_STS_REG		regptr(MSP_SLP_BASE + 0x190)

diff --git a/arch/mips/include/asm/pmc-sierra/msp71xx/msp_usb.h b/arch/mips/include/asm/pmc-sierra/msp71xx/msp_usb.h
new file mode 100644
index 0000000..4c9348d
--- /dev/null
+++ b/arch/mips/include/asm/pmc-sierra/msp71xx/msp_usb.h

@@ -0,0 +1,144 @@
+/******************************************************************
+ * Copyright (c) 2000-2007 PMC-Sierra INC.
+ *
+ *     This program is free software; you can redistribute it
+ *     and/or modify it under the terms of the GNU General
+ *     Public License as published by the Free Software
+ *     Foundation; either version 2 of the License, or (at your
+ *     option) any later version.
+ *
+ *     This program is distributed in the hope that it will be
+ *     useful, but WITHOUT ANY WARRANTY; without even the implied
+ *     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *     PURPOSE.  See the GNU General Public License for more
+ *     details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this program; if not, write to the Free
+ *     Software Foundation, Inc., 675 Mass Ave, Cambridge, MA
+ *     02139, USA.
+ *
+ * PMC-SIERRA INC. DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS
+ * SOFTWARE.
+ */
+#ifndef MSP_USB_H_
+#define MSP_USB_H_
+
+#ifdef CONFIG_MSP_HAS_DUAL_USB
+#define NUM_USB_DEVS   2
+#else
+#define NUM_USB_DEVS   1
+#endif
+
+/* Register spaces for USB host 0 */
+#define MSP_USB0_MAB_START	(MSP_USB0_BASE + 0x0)
+#define MSP_USB0_MAB_END	(MSP_USB0_BASE + 0x17)
+#define MSP_USB0_ID_START	(MSP_USB0_BASE + 0x40000)
+#define MSP_USB0_ID_END		(MSP_USB0_BASE + 0x4008f)
+#define MSP_USB0_HS_START	(MSP_USB0_BASE + 0x40100)
+#define MSP_USB0_HS_END		(MSP_USB0_BASE + 0x401FF)
+
+/* Register spaces for USB host 1 */
+#define	MSP_USB1_MAB_START	(MSP_USB1_BASE + 0x0)
+#define MSP_USB1_MAB_END	(MSP_USB1_BASE + 0x17)
+#define MSP_USB1_ID_START	(MSP_USB1_BASE + 0x40000)
+#define MSP_USB1_ID_END		(MSP_USB1_BASE + 0x4008f)
+#define MSP_USB1_HS_START	(MSP_USB1_BASE + 0x40100)
+#define MSP_USB1_HS_END		(MSP_USB1_BASE + 0x401ff)
+
+/* USB Identification registers */
+struct msp_usbid_regs {
+	u32 id;		/* 0x0: Identification register */
+	u32 hwgen;	/* 0x4: General HW params */
+	u32 hwhost;	/* 0x8: Host HW params */
+	u32 hwdev;	/* 0xc: Device HW params */
+	u32 hwtxbuf;	/* 0x10: Tx buffer HW params */
+	u32 hwrxbuf;	/* 0x14: Rx buffer HW params */
+	u32 reserved[26];
+	u32 timer0_load; /* 0x80: General-purpose timer 0 load*/
+	u32 timer0_ctrl; /* 0x84: General-purpose timer 0 control */
+	u32 timer1_load; /* 0x88: General-purpose timer 1 load*/
+	u32 timer1_ctrl; /* 0x8c: General-purpose timer 1 control */
+};
+
+/* MSBus to AMBA registers */
+struct msp_mab_regs {
+	u32 isr;	/* 0x0: Interrupt status */
+	u32 imr;	/* 0x4: Interrupt mask */
+	u32 thcr0;	/* 0x8: Transaction header capture 0 */
+	u32 thcr1;	/* 0xc: Transaction header capture 1 */
+	u32 int_stat;	/* 0x10: Interrupt status summary */
+	u32 phy_cfg;	/* 0x14: USB phy config */
+};
+
+/* EHCI registers */
+struct msp_usbhs_regs {
+	u32 hciver;	/* 0x0: Version and offset to operational regs */
+	u32 hcsparams;	/* 0x4: Host control structural parameters */
+	u32 hccparams;	/* 0x8: Host control capability parameters */
+	u32 reserved0[5];
+	u32 dciver;	/* 0x20: Device interface version */
+	u32 dccparams;	/* 0x24: Device control capability parameters */
+	u32 reserved1[6];
+	u32 cmd;	/* 0x40: USB command */
+	u32 sts;	/* 0x44: USB status */
+	u32 int_ena;	/* 0x48: USB interrupt enable */
+	u32 frindex;	/* 0x4c: Frame index */
+	u32 reserved3;
+	union {
+		struct {
+			u32 flb_addr; /* 0x54: Frame list base address */
+			u32 next_async_addr; /* 0x58: next asynchronous addr */
+			u32 ttctrl; /* 0x5c: embedded transaction translator
+							async buffer status */
+			u32 burst_size; /* 0x60: Controller burst size */
+			u32 tx_fifo_ctrl; /* 0x64: Tx latency FIFO tuning */
+			u32 reserved0[4];
+			u32 endpt_nak; /* 0x78: Endpoint NAK */
+			u32 endpt_nak_ena; /* 0x7c: Endpoint NAK enable */
+			u32 cfg_flag; /* 0x80: Config flag */
+			u32 port_sc1; /* 0x84: Port status & control 1 */
+			u32 reserved1[7];
+			u32 otgsc;	/* 0xa4: OTG status & control */
+			u32 mode;	/* 0xa8: USB controller mode */
+		} host;
+
+		struct {
+			u32 dev_addr; /* 0x54: Device address */
+			u32 endpt_list_addr; /* 0x58: Endpoint list address */
+			u32 reserved0[7];
+			u32 endpt_nak;	/* 0x74 */
+			u32 endpt_nak_ctrl; /* 0x78 */
+			u32 cfg_flag; /* 0x80 */
+			u32 port_sc1; /* 0x84: Port status & control 1 */
+			u32 reserved[7];
+			u32 otgsc;	/* 0xa4: OTG status & control */
+			u32 mode;	/* 0xa8: USB controller mode */
+			u32 endpt_setup_stat; /* 0xac */
+			u32 endpt_prime; /* 0xb0 */
+			u32 endpt_flush; /* 0xb4 */
+			u32 endpt_stat; /* 0xb8 */
+			u32 endpt_complete; /* 0xbc */
+			u32 endpt_ctrl0; /* 0xc0 */
+			u32 endpt_ctrl1; /* 0xc4 */
+			u32 endpt_ctrl2; /* 0xc8 */
+			u32 endpt_ctrl3; /* 0xcc */
+		} device;
+	} u;
+};
+/*
+ * Container for the more-generic platform_device.
+ * This exists mainly as a way to map the non-standard register
+ * spaces and make them accessible to the USB ISR.
+ */
+struct mspusb_device {
+	struct msp_mab_regs   __iomem *mab_regs;
+	struct msp_usbid_regs __iomem *usbid_regs;
+	struct msp_usbhs_regs __iomem *usbhs_regs;
+	struct platform_device dev;
+};
+
+#define to_mspusb_device(x) container_of((x), struct mspusb_device, dev)
+#define TO_HOST_ID(x) ((x) & 0x3)
+#endif /*MSP_USB_H_*/

diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 396e402..ca61e84 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h

@@ -245,16 +245,16 @@
 		__asm__ __volatile__(
 		"	.set	noreorder	# arch_read_lock	\n"
 		"1:	ll	%1, %2					\n"
-		"	bltz	%1, 2f					\n"
+		"	bltz	%1, 3f					\n"
 		"	 addu	%1, 1					\n"
-		"	sc	%1, %0					\n"
+		"2:	sc	%1, %0					\n"
 		"	beqz	%1, 1b					\n"
 		"	 nop						\n"
 		"	.subsection 2					\n"
-		"2:	ll	%1, %2					\n"
-		"	bltz	%1, 2b					\n"
+		"3:	ll	%1, %2					\n"
+		"	bltz	%1, 3b					\n"
 		"	 addu	%1, 1					\n"
-		"	b	1b					\n"
+		"	b	2b					\n"
 		"	 nop						\n"
 		"	.previous					\n"
 		"	.set	reorder					\n"
@@ -324,16 +324,16 @@
 		__asm__ __volatile__(
 		"	.set	noreorder	# arch_write_lock	\n"
 		"1:	ll	%1, %2					\n"
-		"	bnez	%1, 2f					\n"
+		"	bnez	%1, 3f					\n"
 		"	 lui	%1, 0x8000				\n"
-		"	sc	%1, %0					\n"
-		"	beqz	%1, 2f					\n"
+		"2:	sc	%1, %0					\n"
+		"	beqz	%1, 3f					\n"
 		"	 nop						\n"
 		"	.subsection 2					\n"
-		"2:	ll	%1, %2					\n"
-		"	bnez	%1, 2b					\n"
+		"3:	ll	%1, %2					\n"
+		"	bnez	%1, 3b					\n"
 		"	 lui	%1, 0x8000				\n"
-		"	b	1b					\n"
+		"	b	2b					\n"
 		"	 nop						\n"
 		"	.previous					\n"
 		"	.set	reorder					\n"

diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 550725b..dae22c1 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h

@@ -359,16 +359,20 @@
 #define __NR_fanotify_init		(__NR_Linux + 336)
 #define __NR_fanotify_mark		(__NR_Linux + 337)
 #define __NR_prlimit64			(__NR_Linux + 338)
+#define __NR_name_to_handle_at		(__NR_Linux + 339)
+#define __NR_open_by_handle_at		(__NR_Linux + 340)
+#define __NR_clock_adjtime		(__NR_Linux + 341)
+#define __NR_syncfs			(__NR_Linux + 342)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		338
+#define __NR_Linux_syscalls		342
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		338
+#define __NR_O32_Linux_syscalls		342
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -674,16 +678,20 @@
 #define __NR_fanotify_init		(__NR_Linux + 295)
 #define __NR_fanotify_mark		(__NR_Linux + 296)
 #define __NR_prlimit64			(__NR_Linux + 297)
+#define __NR_name_to_handle_at		(__NR_Linux + 298)
+#define __NR_open_by_handle_at		(__NR_Linux + 299)
+#define __NR_clock_adjtime		(__NR_Linux + 300)
+#define __NR_syncfs			(__NR_Linux + 301)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		297
+#define __NR_Linux_syscalls		301
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		297
+#define __NR_64_Linux_syscalls		301
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -994,16 +1002,20 @@
 #define __NR_fanotify_init		(__NR_Linux + 300)
 #define __NR_fanotify_mark		(__NR_Linux + 301)
 #define __NR_prlimit64			(__NR_Linux + 302)
+#define __NR_name_to_handle_at		(__NR_Linux + 303)
+#define __NR_open_by_handle_at		(__NR_Linux + 304)
+#define __NR_clock_adjtime		(__NR_Linux + 305)
+#define __NR_clock_adjtime		(__NR_Linux + 306)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		302
+#define __NR_Linux_syscalls		306
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		302
+#define __NR_N32_Linux_syscalls		306
 
 #ifdef __KERNEL__
 

diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c
index 35b3e2f..40f7c6b 100644
--- a/arch/mips/jazz/irq.c
+++ b/arch/mips/jazz/irq.c

@@ -23,9 +23,9 @@
 
 static DEFINE_RAW_SPINLOCK(r4030_lock);
 
-static void enable_r4030_irq(unsigned int irq)
+static void enable_r4030_irq(struct irq_data *d)
 {
-	unsigned int mask = 1 << (irq - JAZZ_IRQ_START);
+	unsigned int mask = 1 << (d->irq - JAZZ_IRQ_START);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&r4030_lock, flags);
@@ -34,9 +34,9 @@
 	raw_spin_unlock_irqrestore(&r4030_lock, flags);
 }
 
-void disable_r4030_irq(unsigned int irq)
+void disable_r4030_irq(struct irq_data *d)
 {
-	unsigned int mask = ~(1 << (irq - JAZZ_IRQ_START));
+	unsigned int mask = ~(1 << (d->irq - JAZZ_IRQ_START));
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&r4030_lock, flags);
@@ -47,10 +47,8 @@
 
 static struct irq_chip r4030_irq_type = {
 	.name = "R4030",
-	.ack = disable_r4030_irq,
-	.mask = disable_r4030_irq,
-	.mask_ack = disable_r4030_irq,
-	.unmask = enable_r4030_irq,
+	.irq_mask = disable_r4030_irq,
+	.irq_unmask = enable_r4030_irq,
 };
 
 void __init init_r4030_ints(void)

diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 2c0e107..bc18daa 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c

@@ -23,6 +23,7 @@
 #include <linux/spi/spi_gpio.h>
 #include <linux/power_supply.h>
 #include <linux/power/jz4740-battery.h>
+#include <linux/power/gpio-charger.h>
 
 #include <asm/mach-jz4740/jz4740_fb.h>
 #include <asm/mach-jz4740/jz4740_mmc.h>
@@ -49,14 +50,14 @@
 
 /* NAND */
 static struct nand_ecclayout qi_lb60_ecclayout_1gb = {
-/*	.eccbytes = 36,
+	.eccbytes = 36,
 	.eccpos = {
 		6,  7,  8,  9,  10, 11, 12, 13,
 		14, 15, 16, 17, 18, 19, 20, 21,
 		22, 23, 24, 25, 26, 27, 28, 29,
 		30, 31, 32, 33, 34, 35, 36, 37,
 		38, 39, 40, 41
-	},*/
+	},
 	.oobfree = {
 		{ .offset = 2, .length = 4 },
 		{ .offset = 42, .length = 22 }
@@ -85,7 +86,7 @@
 };
 
 static struct nand_ecclayout qi_lb60_ecclayout_2gb = {
-/*	.eccbytes = 72,
+	.eccbytes = 72,
 	.eccpos = {
 		12, 13, 14, 15, 16, 17, 18, 19,
 		20, 21, 22, 23, 24, 25, 26, 27,
@@ -96,7 +97,7 @@
 		60, 61, 62, 63, 64, 65, 66, 67,
 		68, 69, 70, 71, 72, 73, 74, 75,
 		76, 77, 78, 79, 80, 81, 82, 83
-	},*/
+	},
 	.oobfree = {
 		{ .offset = 2, .length = 10 },
 		{ .offset = 84, .length = 44 },
@@ -396,6 +397,28 @@
 	},
 };
 
+/* charger */
+static char *qi_lb60_batteries[] = {
+	"battery",
+};
+
+static struct gpio_charger_platform_data qi_lb60_charger_pdata = {
+	.name = "usb",
+	.type = POWER_SUPPLY_TYPE_USB,
+	.gpio = JZ_GPIO_PORTD(28),
+	.gpio_active_low = 1,
+	.supplied_to = qi_lb60_batteries,
+	.num_supplicants = ARRAY_SIZE(qi_lb60_batteries),
+};
+
+static struct platform_device qi_lb60_charger_device = {
+	.name = "gpio-charger",
+	.dev = {
+		.platform_data = &qi_lb60_charger_pdata,
+	},
+};
+
+
 static struct platform_device *jz_platform_devices[] __initdata = {
 	&jz4740_udc_device,
 	&jz4740_mmc_device,
@@ -410,6 +433,7 @@
 	&jz4740_adc_device,
 	&qi_lb60_gpio_keys,
 	&qi_lb60_pwm_beeper,
+	&qi_lb60_charger_device,
 };
 
 static void __init board_gpio_setup(void)

diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
index 88e6aed..bd2fc29 100644
--- a/arch/mips/jz4740/gpio.c
+++ b/arch/mips/jz4740/gpio.c

@@ -86,7 +86,6 @@
 	spinlock_t lock;
 
 	struct gpio_chip gpio_chip;
-	struct irq_chip irq_chip;
 	struct sys_device sysdev;
 };
 
@@ -102,9 +101,9 @@
 	return container_of(gpio_chip, struct jz_gpio_chip, gpio_chip);
 }
 
-static inline struct jz_gpio_chip *irq_to_jz_gpio_chip(unsigned int irq)
+static inline struct jz_gpio_chip *irq_to_jz_gpio_chip(struct irq_data *data)
 {
-	return get_irq_chip_data(irq);
+	return irq_data_get_irq_chip_data(data);
 }
 
 static inline void jz_gpio_write_bit(unsigned int gpio, unsigned int reg)
@@ -325,62 +324,52 @@
 	generic_handle_irq(gpio_irq);
 };
 
-static inline void jz_gpio_set_irq_bit(unsigned int irq, unsigned int reg)
+static inline void jz_gpio_set_irq_bit(struct irq_data *data, unsigned int reg)
 {
-	struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(irq);
-	writel(IRQ_TO_BIT(irq), chip->base + reg);
+	struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
+	writel(IRQ_TO_BIT(data->irq), chip->base + reg);
 }
 
-static void jz_gpio_irq_mask(unsigned int irq)
+static void jz_gpio_irq_mask(struct irq_data *data)
 {
-	jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_MASK_SET);
+	jz_gpio_set_irq_bit(data, JZ_REG_GPIO_MASK_SET);
 };
 
-static void jz_gpio_irq_unmask(unsigned int irq)
+static void jz_gpio_irq_unmask(struct irq_data *data)
 {
-	struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(irq);
+	struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
 
-	jz_gpio_check_trigger_both(chip, irq);
+	jz_gpio_check_trigger_both(chip, data->irq);
 
-	jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_MASK_CLEAR);
+	jz_gpio_set_irq_bit(data, JZ_REG_GPIO_MASK_CLEAR);
 };
 
 /* TODO: Check if function is gpio */
-static unsigned int jz_gpio_irq_startup(unsigned int irq)
+static unsigned int jz_gpio_irq_startup(struct irq_data *data)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_SELECT_SET);
-
-	desc->status &= ~IRQ_MASKED;
-	jz_gpio_irq_unmask(irq);
-
+	jz_gpio_set_irq_bit(data, JZ_REG_GPIO_SELECT_SET);
+	jz_gpio_irq_unmask(data);
 	return 0;
 }
 
-static void jz_gpio_irq_shutdown(unsigned int irq)
+static void jz_gpio_irq_shutdown(struct irq_data *data)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	jz_gpio_irq_mask(irq);
-	desc->status |= IRQ_MASKED;
+	jz_gpio_irq_mask(data);
 
 	/* Set direction to input */
-	jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_DIRECTION_CLEAR);
-	jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_SELECT_CLEAR);
+	jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR);
+	jz_gpio_set_irq_bit(data, JZ_REG_GPIO_SELECT_CLEAR);
 }
 
-static void jz_gpio_irq_ack(unsigned int irq)
+static void jz_gpio_irq_ack(struct irq_data *data)
 {
-	jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_FLAG_CLEAR);
+	jz_gpio_set_irq_bit(data, JZ_REG_GPIO_FLAG_CLEAR);
 };
 
-static int jz_gpio_irq_set_type(unsigned int irq, unsigned int flow_type)
+static int jz_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type)
 {
-	struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(irq);
-	struct irq_desc *desc = irq_to_desc(irq);
-
-	jz_gpio_irq_mask(irq);
+	struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
+	unsigned int irq = data->irq;
 
 	if (flow_type == IRQ_TYPE_EDGE_BOTH) {
 		uint32_t value = readl(chip->base + JZ_REG_GPIO_PIN);
@@ -395,45 +384,54 @@
 
 	switch (flow_type) {
 	case IRQ_TYPE_EDGE_RISING:
-		jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_DIRECTION_SET);
-		jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_TRIGGER_SET);
+		jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_SET);
+		jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_SET);
 		break;
 	case IRQ_TYPE_EDGE_FALLING:
-		jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_DIRECTION_CLEAR);
-		jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_TRIGGER_SET);
+		jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR);
+		jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_SET);
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:
-		jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_DIRECTION_SET);
-		jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_TRIGGER_CLEAR);
+		jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_SET);
+		jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_CLEAR);
 		break;
 	case IRQ_TYPE_LEVEL_LOW:
-		jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_DIRECTION_CLEAR);
-		jz_gpio_set_irq_bit(irq, JZ_REG_GPIO_TRIGGER_CLEAR);
+		jz_gpio_set_irq_bit(data, JZ_REG_GPIO_DIRECTION_CLEAR);
+		jz_gpio_set_irq_bit(data, JZ_REG_GPIO_TRIGGER_CLEAR);
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	if (!(desc->status & IRQ_MASKED))
-		jz_gpio_irq_unmask(irq);
-
 	return 0;
 }
 
-static int jz_gpio_irq_set_wake(unsigned int irq, unsigned int on)
+static int jz_gpio_irq_set_wake(struct irq_data *data, unsigned int on)
 {
-	struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(irq);
+	struct jz_gpio_chip *chip = irq_to_jz_gpio_chip(data);
 	spin_lock(&chip->lock);
 	if (on)
-		chip->wakeup |= IRQ_TO_BIT(irq);
+		chip->wakeup |= IRQ_TO_BIT(data->irq);
 	else
-		chip->wakeup &= ~IRQ_TO_BIT(irq);
+		chip->wakeup &= ~IRQ_TO_BIT(data->irq);
 	spin_unlock(&chip->lock);
 
 	set_irq_wake(chip->irq, on);
 	return 0;
 }
 
+static struct irq_chip jz_gpio_irq_chip = {
+	.name = "GPIO",
+	.irq_mask = jz_gpio_irq_mask,
+	.irq_unmask = jz_gpio_irq_unmask,
+	.irq_ack = jz_gpio_irq_ack,
+	.irq_startup = jz_gpio_irq_startup,
+	.irq_shutdown = jz_gpio_irq_shutdown,
+	.irq_set_type = jz_gpio_irq_set_type,
+	.irq_set_wake = jz_gpio_irq_set_wake,
+	.flags = IRQCHIP_SET_TYPE_MASKED,
+};
+
 /*
  * This lock class tells lockdep that GPIO irqs are in a different
  * category than their parents, so it won't report false recursion.
@@ -452,16 +450,6 @@
 		.base = JZ4740_GPIO_BASE_ ## _bank, \
 		.ngpio = JZ4740_GPIO_NUM_ ## _bank, \
 	}, \
-	.irq_chip =  { \
-		.name = "GPIO Bank " # _bank, \
-		.mask = jz_gpio_irq_mask, \
-		.unmask = jz_gpio_irq_unmask, \
-		.ack = jz_gpio_irq_ack, \
-		.startup = jz_gpio_irq_startup, \
-		.shutdown = jz_gpio_irq_shutdown, \
-		.set_type = jz_gpio_irq_set_type, \
-		.set_wake = jz_gpio_irq_set_wake, \
-	}, \
 }
 
 static struct jz_gpio_chip jz4740_gpio_chips[] = {
@@ -526,9 +514,10 @@
 	set_irq_chained_handler(chip->irq, jz_gpio_irq_demux_handler);
 
 	for (irq = chip->irq_base; irq < chip->irq_base + chip->gpio_chip.ngpio; ++irq) {
-		lockdep_set_class(&irq_desc[irq].lock, &gpio_lock_class);
+		irq_set_lockdep_class(irq, &gpio_lock_class);
 		set_irq_chip_data(irq, chip);
-		set_irq_chip_and_handler(irq, &chip->irq_chip, handle_level_irq);
+		set_irq_chip_and_handler(irq, &jz_gpio_irq_chip,
+			handle_level_irq);
 	}
 
 	return 0;

diff --git a/arch/mips/jz4740/irq.c b/arch/mips/jz4740/irq.c
index 7d33ff8..dcc5593 100644
--- a/arch/mips/jz4740/irq.c
+++ b/arch/mips/jz4740/irq.c

@@ -43,32 +43,37 @@
 
 #define IRQ_BIT(x) BIT((x) - JZ4740_IRQ_BASE)
 
-static void intc_irq_unmask(unsigned int irq)
+static inline unsigned long intc_irq_bit(struct irq_data *data)
 {
-	writel(IRQ_BIT(irq), jz_intc_base + JZ_REG_INTC_CLEAR_MASK);
+	return (unsigned long)irq_data_get_irq_chip_data(data);
 }
 
-static void intc_irq_mask(unsigned int irq)
+static void intc_irq_unmask(struct irq_data *data)
 {
-	writel(IRQ_BIT(irq), jz_intc_base + JZ_REG_INTC_SET_MASK);
+	writel(intc_irq_bit(data), jz_intc_base + JZ_REG_INTC_CLEAR_MASK);
 }
 
-static int intc_irq_set_wake(unsigned int irq, unsigned int on)
+static void intc_irq_mask(struct irq_data *data)
+{
+	writel(intc_irq_bit(data), jz_intc_base + JZ_REG_INTC_SET_MASK);
+}
+
+static int intc_irq_set_wake(struct irq_data *data, unsigned int on)
 {
 	if (on)
-		jz_intc_wakeup |= IRQ_BIT(irq);
+		jz_intc_wakeup |= intc_irq_bit(data);
 	else
-		jz_intc_wakeup &= ~IRQ_BIT(irq);
+		jz_intc_wakeup &= ~intc_irq_bit(data);
 
 	return 0;
 }
 
 static struct irq_chip intc_irq_type = {
 	.name =		"INTC",
-	.mask =		intc_irq_mask,
-	.mask_ack =	intc_irq_mask,
-	.unmask =	intc_irq_unmask,
-	.set_wake =	intc_irq_set_wake,
+	.irq_mask =	intc_irq_mask,
+	.irq_mask_ack =	intc_irq_mask,
+	.irq_unmask =	intc_irq_unmask,
+	.irq_set_wake =	intc_irq_set_wake,
 };
 
 static irqreturn_t jz4740_cascade(int irq, void *data)
@@ -95,8 +100,11 @@
 
 	jz_intc_base = ioremap(JZ4740_INTC_BASE_ADDR, 0x14);
 
+	/* Mask all irqs */
+	writel(0xffffffff, jz_intc_base + JZ_REG_INTC_SET_MASK);
+
 	for (i = JZ4740_IRQ_BASE; i < JZ4740_IRQ_BASE + 32; i++) {
-		intc_irq_mask(i);
+		set_irq_chip_data(i, (void *)IRQ_BIT(i));
 		set_irq_chip_and_handler(i, &intc_irq_type, handle_level_irq);
 	}
 

diff --git a/arch/mips/kernel/i8259.c b/arch/mips/kernel/i8259.c
index c58176c..e221662 100644
--- a/arch/mips/kernel/i8259.c
+++ b/arch/mips/kernel/i8259.c

@@ -31,19 +31,19 @@
 
 static int i8259A_auto_eoi = -1;
 DEFINE_RAW_SPINLOCK(i8259A_lock);
-static void disable_8259A_irq(unsigned int irq);
-static void enable_8259A_irq(unsigned int irq);
-static void mask_and_ack_8259A(unsigned int irq);
+static void disable_8259A_irq(struct irq_data *d);
+static void enable_8259A_irq(struct irq_data *d);
+static void mask_and_ack_8259A(struct irq_data *d);
 static void init_8259A(int auto_eoi);
 
 static struct irq_chip i8259A_chip = {
-	.name		= "XT-PIC",
-	.mask		= disable_8259A_irq,
-	.disable	= disable_8259A_irq,
-	.unmask		= enable_8259A_irq,
-	.mask_ack	= mask_and_ack_8259A,
+	.name			= "XT-PIC",
+	.irq_mask		= disable_8259A_irq,
+	.irq_disable		= disable_8259A_irq,
+	.irq_unmask		= enable_8259A_irq,
+	.irq_mask_ack		= mask_and_ack_8259A,
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
-	.set_affinity	= plat_set_irq_affinity,
+	.irq_set_affinity	= plat_set_irq_affinity,
 #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
 };
 
@@ -59,12 +59,11 @@
 #define cached_master_mask	(cached_irq_mask)
 #define cached_slave_mask	(cached_irq_mask >> 8)
 
-static void disable_8259A_irq(unsigned int irq)
+static void disable_8259A_irq(struct irq_data *d)
 {
-	unsigned int mask;
+	unsigned int mask, irq = d->irq - I8259A_IRQ_BASE;
 	unsigned long flags;
 
-	irq -= I8259A_IRQ_BASE;
 	mask = 1 << irq;
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
 	cached_irq_mask |= mask;
@@ -75,12 +74,11 @@
 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
-static void enable_8259A_irq(unsigned int irq)
+static void enable_8259A_irq(struct irq_data *d)
 {
-	unsigned int mask;
+	unsigned int mask, irq = d->irq - I8259A_IRQ_BASE;
 	unsigned long flags;
 
-	irq -= I8259A_IRQ_BASE;
 	mask = ~(1 << irq);
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
 	cached_irq_mask &= mask;
@@ -145,12 +143,11 @@
  * first, _then_ send the EOI, and the order of EOI
  * to the two 8259s is important!
  */
-static void mask_and_ack_8259A(unsigned int irq)
+static void mask_and_ack_8259A(struct irq_data *d)
 {
-	unsigned int irqmask;
+	unsigned int irqmask, irq = d->irq - I8259A_IRQ_BASE;
 	unsigned long flags;
 
-	irq -= I8259A_IRQ_BASE;
 	irqmask = 1 << irq;
 	raw_spin_lock_irqsave(&i8259A_lock, flags);
 	/*
@@ -290,9 +287,9 @@
 		 * In AEOI mode we just have to mask the interrupt
 		 * when acking.
 		 */
-		i8259A_chip.mask_ack = disable_8259A_irq;
+		i8259A_chip.irq_mask_ack = disable_8259A_irq;
 	else
-		i8259A_chip.mask_ack = mask_and_ack_8259A;
+		i8259A_chip.irq_mask_ack = mask_and_ack_8259A;
 
 	udelay(100);		/* wait for 8259A to initialize */
 

diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index 1774271..43cd962 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c

@@ -87,17 +87,10 @@
 	return i;
 }
 
-static unsigned int gic_irq_startup(unsigned int irq)
+static void gic_irq_ack(struct irq_data *d)
 {
-	irq -= _irqbase;
-	pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
-	GIC_SET_INTR_MASK(irq);
-	return 0;
-}
+	unsigned int irq = d->irq - _irqbase;
 
-static void gic_irq_ack(unsigned int irq)
-{
-	irq -= _irqbase;
 	pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
 	GIC_CLR_INTR_MASK(irq);
 
@@ -105,16 +98,16 @@
 		GICWRITE(GIC_REG(SHARED, GIC_SH_WEDGE), irq);
 }
 
-static void gic_mask_irq(unsigned int irq)
+static void gic_mask_irq(struct irq_data *d)
 {
-	irq -= _irqbase;
+	unsigned int irq = d->irq - _irqbase;
 	pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
 	GIC_CLR_INTR_MASK(irq);
 }
 
-static void gic_unmask_irq(unsigned int irq)
+static void gic_unmask_irq(struct irq_data *d)
 {
-	irq -= _irqbase;
+	unsigned int irq = d->irq - _irqbase;
 	pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
 	GIC_SET_INTR_MASK(irq);
 }
@@ -123,13 +116,14 @@
 
 static DEFINE_SPINLOCK(gic_lock);
 
-static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask,
+			    bool force)
 {
+	unsigned int irq = d->irq - _irqbase;
 	cpumask_t	tmp = CPU_MASK_NONE;
 	unsigned long	flags;
 	int		i;
 
-	irq -= _irqbase;
 	pr_debug("%s(%d) called\n", __func__, irq);
 	cpumask_and(&tmp, cpumask, cpu_online_mask);
 	if (cpus_empty(tmp))
@@ -147,23 +141,22 @@
 		set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
 
 	}
-	cpumask_copy(irq_desc[irq].affinity, cpumask);
+	cpumask_copy(d->affinity, cpumask);
 	spin_unlock_irqrestore(&gic_lock, flags);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 #endif
 
 static struct irq_chip gic_irq_controller = {
-	.name		=	"MIPS GIC",
-	.startup	=	gic_irq_startup,
-	.ack		=	gic_irq_ack,
-	.mask		=	gic_mask_irq,
-	.mask_ack	=	gic_mask_irq,
-	.unmask		=	gic_unmask_irq,
-	.eoi		=	gic_unmask_irq,
+	.name			=	"MIPS GIC",
+	.irq_ack		=	gic_irq_ack,
+	.irq_mask		=	gic_mask_irq,
+	.irq_mask_ack		=	gic_mask_irq,
+	.irq_unmask		=	gic_unmask_irq,
+	.irq_eoi		=	gic_unmask_irq,
 #ifdef CONFIG_SMP
-	.set_affinity	=	gic_set_affinity,
+	.irq_set_affinity	=	gic_set_affinity,
 #endif
 };
 

diff --git a/arch/mips/kernel/irq-gt641xx.c b/arch/mips/kernel/irq-gt641xx.c
index 42ef814..7fd176f 100644
--- a/arch/mips/kernel/irq-gt641xx.c
+++ b/arch/mips/kernel/irq-gt641xx.c

@@ -29,64 +29,64 @@
 
 static DEFINE_RAW_SPINLOCK(gt641xx_irq_lock);
 
-static void ack_gt641xx_irq(unsigned int irq)
+static void ack_gt641xx_irq(struct irq_data *d)
 {
 	unsigned long flags;
 	u32 cause;
 
 	raw_spin_lock_irqsave(&gt641xx_irq_lock, flags);
 	cause = GT_READ(GT_INTRCAUSE_OFS);
-	cause &= ~GT641XX_IRQ_TO_BIT(irq);
+	cause &= ~GT641XX_IRQ_TO_BIT(d->irq);
 	GT_WRITE(GT_INTRCAUSE_OFS, cause);
 	raw_spin_unlock_irqrestore(&gt641xx_irq_lock, flags);
 }
 
-static void mask_gt641xx_irq(unsigned int irq)
+static void mask_gt641xx_irq(struct irq_data *d)
 {
 	unsigned long flags;
 	u32 mask;
 
 	raw_spin_lock_irqsave(&gt641xx_irq_lock, flags);
 	mask = GT_READ(GT_INTRMASK_OFS);
-	mask &= ~GT641XX_IRQ_TO_BIT(irq);
+	mask &= ~GT641XX_IRQ_TO_BIT(d->irq);
 	GT_WRITE(GT_INTRMASK_OFS, mask);
 	raw_spin_unlock_irqrestore(&gt641xx_irq_lock, flags);
 }
 
-static void mask_ack_gt641xx_irq(unsigned int irq)
+static void mask_ack_gt641xx_irq(struct irq_data *d)
 {
 	unsigned long flags;
 	u32 cause, mask;
 
 	raw_spin_lock_irqsave(&gt641xx_irq_lock, flags);
 	mask = GT_READ(GT_INTRMASK_OFS);
-	mask &= ~GT641XX_IRQ_TO_BIT(irq);
+	mask &= ~GT641XX_IRQ_TO_BIT(d->irq);
 	GT_WRITE(GT_INTRMASK_OFS, mask);
 
 	cause = GT_READ(GT_INTRCAUSE_OFS);
-	cause &= ~GT641XX_IRQ_TO_BIT(irq);
+	cause &= ~GT641XX_IRQ_TO_BIT(d->irq);
 	GT_WRITE(GT_INTRCAUSE_OFS, cause);
 	raw_spin_unlock_irqrestore(&gt641xx_irq_lock, flags);
 }
 
-static void unmask_gt641xx_irq(unsigned int irq)
+static void unmask_gt641xx_irq(struct irq_data *d)
 {
 	unsigned long flags;
 	u32 mask;
 
 	raw_spin_lock_irqsave(&gt641xx_irq_lock, flags);
 	mask = GT_READ(GT_INTRMASK_OFS);
-	mask |= GT641XX_IRQ_TO_BIT(irq);
+	mask |= GT641XX_IRQ_TO_BIT(d->irq);
 	GT_WRITE(GT_INTRMASK_OFS, mask);
 	raw_spin_unlock_irqrestore(&gt641xx_irq_lock, flags);
 }
 
 static struct irq_chip gt641xx_irq_chip = {
 	.name		= "GT641xx",
-	.ack		= ack_gt641xx_irq,
-	.mask		= mask_gt641xx_irq,
-	.mask_ack	= mask_ack_gt641xx_irq,
-	.unmask		= unmask_gt641xx_irq,
+	.irq_ack	= ack_gt641xx_irq,
+	.irq_mask	= mask_gt641xx_irq,
+	.irq_mask_ack	= mask_ack_gt641xx_irq,
+	.irq_unmask	= unmask_gt641xx_irq,
 };
 
 void gt641xx_irq_dispatch(void)

diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c
index 6a8cd28..fc800cd 100644
--- a/arch/mips/kernel/irq-msc01.c
+++ b/arch/mips/kernel/irq-msc01.c

@@ -28,8 +28,10 @@
 static unsigned int irq_base;
 
 /* mask off an interrupt */
-static inline void mask_msc_irq(unsigned int irq)
+static inline void mask_msc_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
+
 	if (irq < (irq_base + 32))
 		MSCIC_WRITE(MSC01_IC_DISL, 1<<(irq - irq_base));
 	else
@@ -37,8 +39,10 @@
 }
 
 /* unmask an interrupt */
-static inline void unmask_msc_irq(unsigned int irq)
+static inline void unmask_msc_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
+
 	if (irq < (irq_base + 32))
 		MSCIC_WRITE(MSC01_IC_ENAL, 1<<(irq - irq_base));
 	else
@@ -48,9 +52,11 @@
 /*
  * Masks and ACKs an IRQ
  */
-static void level_mask_and_ack_msc_irq(unsigned int irq)
+static void level_mask_and_ack_msc_irq(struct irq_data *d)
 {
-	mask_msc_irq(irq);
+	unsigned int irq = d->irq;
+
+	mask_msc_irq(d);
 	if (!cpu_has_veic)
 		MSCIC_WRITE(MSC01_IC_EOI, 0);
 	/* This actually needs to be a call into platform code */
@@ -60,9 +66,11 @@
 /*
  * Masks and ACKs an IRQ
  */
-static void edge_mask_and_ack_msc_irq(unsigned int irq)
+static void edge_mask_and_ack_msc_irq(struct irq_data *d)
 {
-	mask_msc_irq(irq);
+	unsigned int irq = d->irq;
+
+	mask_msc_irq(d);
 	if (!cpu_has_veic)
 		MSCIC_WRITE(MSC01_IC_EOI, 0);
 	else {
@@ -75,15 +83,6 @@
 }
 
 /*
- * End IRQ processing
- */
-static void end_msc_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		unmask_msc_irq(irq);
-}
-
-/*
  * Interrupt handler for interrupts coming from SOC-it.
  */
 void ll_msc_irq(void)
@@ -107,22 +106,20 @@
 
 static struct irq_chip msc_levelirq_type = {
 	.name = "SOC-it-Level",
-	.ack = level_mask_and_ack_msc_irq,
-	.mask = mask_msc_irq,
-	.mask_ack = level_mask_and_ack_msc_irq,
-	.unmask = unmask_msc_irq,
-	.eoi = unmask_msc_irq,
-	.end = end_msc_irq,
+	.irq_ack = level_mask_and_ack_msc_irq,
+	.irq_mask = mask_msc_irq,
+	.irq_mask_ack = level_mask_and_ack_msc_irq,
+	.irq_unmask = unmask_msc_irq,
+	.irq_eoi = unmask_msc_irq,
 };
 
 static struct irq_chip msc_edgeirq_type = {
 	.name = "SOC-it-Edge",
-	.ack = edge_mask_and_ack_msc_irq,
-	.mask = mask_msc_irq,
-	.mask_ack = edge_mask_and_ack_msc_irq,
-	.unmask = unmask_msc_irq,
-	.eoi = unmask_msc_irq,
-	.end = end_msc_irq,
+	.irq_ack = edge_mask_and_ack_msc_irq,
+	.irq_mask = mask_msc_irq,
+	.irq_mask_ack = edge_mask_and_ack_msc_irq,
+	.irq_unmask = unmask_msc_irq,
+	.irq_eoi = unmask_msc_irq,
 };
 
 

diff --git a/arch/mips/kernel/irq-rm7000.c b/arch/mips/kernel/irq-rm7000.c
index 9731e8b..fd24fd9 100644
--- a/arch/mips/kernel/irq-rm7000.c
+++ b/arch/mips/kernel/irq-rm7000.c

@@ -18,23 +18,23 @@
 #include <asm/mipsregs.h>
 #include <asm/system.h>
 
-static inline void unmask_rm7k_irq(unsigned int irq)
+static inline void unmask_rm7k_irq(struct irq_data *d)
 {
-	set_c0_intcontrol(0x100 << (irq - RM7K_CPU_IRQ_BASE));
+	set_c0_intcontrol(0x100 << (d->irq - RM7K_CPU_IRQ_BASE));
 }
 
-static inline void mask_rm7k_irq(unsigned int irq)
+static inline void mask_rm7k_irq(struct irq_data *d)
 {
-	clear_c0_intcontrol(0x100 << (irq - RM7K_CPU_IRQ_BASE));
+	clear_c0_intcontrol(0x100 << (d->irq - RM7K_CPU_IRQ_BASE));
 }
 
 static struct irq_chip rm7k_irq_controller = {
 	.name = "RM7000",
-	.ack = mask_rm7k_irq,
-	.mask = mask_rm7k_irq,
-	.mask_ack = mask_rm7k_irq,
-	.unmask = unmask_rm7k_irq,
-	.eoi	= unmask_rm7k_irq
+	.irq_ack = mask_rm7k_irq,
+	.irq_mask = mask_rm7k_irq,
+	.irq_mask_ack = mask_rm7k_irq,
+	.irq_unmask = unmask_rm7k_irq,
+	.irq_eoi = unmask_rm7k_irq
 };
 
 void __init rm7k_cpu_irq_init(void)

diff --git a/arch/mips/kernel/irq-rm9000.c b/arch/mips/kernel/irq-rm9000.c
index b7e4025..ca463ec 100644
--- a/arch/mips/kernel/irq-rm9000.c
+++ b/arch/mips/kernel/irq-rm9000.c

@@ -19,22 +19,22 @@
 #include <asm/mipsregs.h>
 #include <asm/system.h>
 
-static inline void unmask_rm9k_irq(unsigned int irq)
+static inline void unmask_rm9k_irq(struct irq_data *d)
 {
-	set_c0_intcontrol(0x1000 << (irq - RM9K_CPU_IRQ_BASE));
+	set_c0_intcontrol(0x1000 << (d->irq - RM9K_CPU_IRQ_BASE));
 }
 
-static inline void mask_rm9k_irq(unsigned int irq)
+static inline void mask_rm9k_irq(struct irq_data *d)
 {
-	clear_c0_intcontrol(0x1000 << (irq - RM9K_CPU_IRQ_BASE));
+	clear_c0_intcontrol(0x1000 << (d->irq - RM9K_CPU_IRQ_BASE));
 }
 
-static inline void rm9k_cpu_irq_enable(unsigned int irq)
+static inline void rm9k_cpu_irq_enable(struct irq_data *d)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	unmask_rm9k_irq(irq);
+	unmask_rm9k_irq(d);
 	local_irq_restore(flags);
 }
 
@@ -43,50 +43,47 @@
  */
 static void local_rm9k_perfcounter_irq_startup(void *args)
 {
-	unsigned int irq = (unsigned int) args;
-
-	rm9k_cpu_irq_enable(irq);
+	rm9k_cpu_irq_enable(args);
 }
 
-static unsigned int rm9k_perfcounter_irq_startup(unsigned int irq)
+static unsigned int rm9k_perfcounter_irq_startup(struct irq_data *d)
 {
-	on_each_cpu(local_rm9k_perfcounter_irq_startup, (void *) irq, 1);
+	on_each_cpu(local_rm9k_perfcounter_irq_startup, d, 1);
 
 	return 0;
 }
 
 static void local_rm9k_perfcounter_irq_shutdown(void *args)
 {
-	unsigned int irq = (unsigned int) args;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	mask_rm9k_irq(irq);
+	mask_rm9k_irq(args);
 	local_irq_restore(flags);
 }
 
-static void rm9k_perfcounter_irq_shutdown(unsigned int irq)
+static void rm9k_perfcounter_irq_shutdown(struct irq_data *d)
 {
-	on_each_cpu(local_rm9k_perfcounter_irq_shutdown, (void *) irq, 1);
+	on_each_cpu(local_rm9k_perfcounter_irq_shutdown, d, 1);
 }
 
 static struct irq_chip rm9k_irq_controller = {
 	.name = "RM9000",
-	.ack = mask_rm9k_irq,
-	.mask = mask_rm9k_irq,
-	.mask_ack = mask_rm9k_irq,
-	.unmask = unmask_rm9k_irq,
-	.eoi	= unmask_rm9k_irq
+	.irq_ack = mask_rm9k_irq,
+	.irq_mask = mask_rm9k_irq,
+	.irq_mask_ack = mask_rm9k_irq,
+	.irq_unmask = unmask_rm9k_irq,
+	.irq_eoi = unmask_rm9k_irq
 };
 
 static struct irq_chip rm9k_perfcounter_irq = {
 	.name = "RM9000",
-	.startup = rm9k_perfcounter_irq_startup,
-	.shutdown = rm9k_perfcounter_irq_shutdown,
-	.ack = mask_rm9k_irq,
-	.mask = mask_rm9k_irq,
-	.mask_ack = mask_rm9k_irq,
-	.unmask = unmask_rm9k_irq,
+	.irq_startup = rm9k_perfcounter_irq_startup,
+	.irq_shutdown = rm9k_perfcounter_irq_shutdown,
+	.irq_ack = mask_rm9k_irq,
+	.irq_mask = mask_rm9k_irq,
+	.irq_mask_ack = mask_rm9k_irq,
+	.irq_unmask = unmask_rm9k_irq,
 };
 
 unsigned int rm9000_perfcount_irq;

diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 4f93db5..1b68ebe 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c

@@ -81,48 +81,9 @@
 
 atomic_t irq_err_count;
 
-/*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
+int arch_show_interrupts(struct seq_file *p, int prec)
 {
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
-
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%d       ", j);
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
-		action = irq_desc[i].action;
-		if (!action)
-			goto skip;
-		seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#endif
-		seq_printf(p, " %14s", irq_desc[i].chip->name);
-		seq_printf(p, "  %s", action->name);
-
-		for (action=action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-
-		seq_putc(p, '\n');
-skip:
-		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
-		seq_putc(p, '\n');
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-	}
+	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 	return 0;
 }
 
@@ -183,8 +144,8 @@
 {
 	irq_enter();
 	check_stack_overflow();
-	__DO_IRQ_SMTC_HOOK(irq);
-	generic_handle_irq(irq);
+	if (!smtc_handle_on_other_cpu(irq))
+		generic_handle_irq(irq);
 	irq_exit();
 }
 
@@ -197,7 +158,7 @@
 void __irq_entry do_IRQ_no_affinity(unsigned int irq)
 {
 	irq_enter();
-	__NO_AFFINITY_IRQ_SMTC_HOOK(irq);
+	smtc_im_backstop(irq);
 	generic_handle_irq(irq);
 	irq_exit();
 }

diff --git a/arch/mips/kernel/irq_cpu.c b/arch/mips/kernel/irq_cpu.c
index 0262abe..fd945c5 100644
--- a/arch/mips/kernel/irq_cpu.c
+++ b/arch/mips/kernel/irq_cpu.c

@@ -37,42 +37,38 @@
 #include <asm/mipsmtregs.h>
 #include <asm/system.h>
 
-static inline void unmask_mips_irq(unsigned int irq)
+static inline void unmask_mips_irq(struct irq_data *d)
 {
-	set_c0_status(0x100 << (irq - MIPS_CPU_IRQ_BASE));
+	set_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
 	irq_enable_hazard();
 }
 
-static inline void mask_mips_irq(unsigned int irq)
+static inline void mask_mips_irq(struct irq_data *d)
 {
-	clear_c0_status(0x100 << (irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
 	irq_disable_hazard();
 }
 
 static struct irq_chip mips_cpu_irq_controller = {
 	.name		= "MIPS",
-	.ack		= mask_mips_irq,
-	.mask		= mask_mips_irq,
-	.mask_ack	= mask_mips_irq,
-	.unmask		= unmask_mips_irq,
-	.eoi		= unmask_mips_irq,
+	.irq_ack	= mask_mips_irq,
+	.irq_mask	= mask_mips_irq,
+	.irq_mask_ack	= mask_mips_irq,
+	.irq_unmask	= unmask_mips_irq,
+	.irq_eoi	= unmask_mips_irq,
 };
 
 /*
  * Basically the same as above but taking care of all the MT stuff
  */
 
-#define unmask_mips_mt_irq	unmask_mips_irq
-#define mask_mips_mt_irq	mask_mips_irq
-
-static unsigned int mips_mt_cpu_irq_startup(unsigned int irq)
+static unsigned int mips_mt_cpu_irq_startup(struct irq_data *d)
 {
 	unsigned int vpflags = dvpe();
 
-	clear_c0_cause(0x100 << (irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_cause(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
 	evpe(vpflags);
-	unmask_mips_mt_irq(irq);
-
+	unmask_mips_irq(d);
 	return 0;
 }
 
@@ -80,22 +76,22 @@
  * While we ack the interrupt interrupts are disabled and thus we don't need
  * to deal with concurrency issues.  Same for mips_cpu_irq_end.
  */
-static void mips_mt_cpu_irq_ack(unsigned int irq)
+static void mips_mt_cpu_irq_ack(struct irq_data *d)
 {
 	unsigned int vpflags = dvpe();
-	clear_c0_cause(0x100 << (irq - MIPS_CPU_IRQ_BASE));
+	clear_c0_cause(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
 	evpe(vpflags);
-	mask_mips_mt_irq(irq);
+	mask_mips_irq(d);
 }
 
 static struct irq_chip mips_mt_cpu_irq_controller = {
 	.name		= "MIPS",
-	.startup	= mips_mt_cpu_irq_startup,
-	.ack		= mips_mt_cpu_irq_ack,
-	.mask		= mask_mips_mt_irq,
-	.mask_ack	= mips_mt_cpu_irq_ack,
-	.unmask		= unmask_mips_mt_irq,
-	.eoi		= unmask_mips_mt_irq,
+	.irq_startup	= mips_mt_cpu_irq_startup,
+	.irq_ack	= mips_mt_cpu_irq_ack,
+	.irq_mask	= mask_mips_irq,
+	.irq_mask_ack	= mips_mt_cpu_irq_ack,
+	.irq_unmask	= unmask_mips_irq,
+	.irq_eoi	= unmask_mips_irq,
 };
 
 void __init mips_cpu_irq_init(void)

diff --git a/arch/mips/kernel/irq_txx9.c b/arch/mips/kernel/irq_txx9.c
index 95a96f6..526e158 100644
--- a/arch/mips/kernel/irq_txx9.c
+++ b/arch/mips/kernel/irq_txx9.c

@@ -63,9 +63,9 @@
 	unsigned char mode;
 } txx9irq[TXx9_MAX_IR] __read_mostly;
 
-static void txx9_irq_unmask(unsigned int irq)
+static void txx9_irq_unmask(struct irq_data *d)
 {
-	unsigned int irq_nr = irq - TXX9_IRQ_BASE;
+	unsigned int irq_nr = d->irq - TXX9_IRQ_BASE;
 	u32 __iomem *ilrp = &txx9_ircptr->ilr[(irq_nr % 16 ) / 2];
 	int ofs = irq_nr / 16 * 16 + (irq_nr & 1) * 8;
 
@@ -79,9 +79,9 @@
 #endif
 }
 
-static inline void txx9_irq_mask(unsigned int irq)
+static inline void txx9_irq_mask(struct irq_data *d)
 {
-	unsigned int irq_nr = irq - TXX9_IRQ_BASE;
+	unsigned int irq_nr = d->irq - TXX9_IRQ_BASE;
 	u32 __iomem *ilrp = &txx9_ircptr->ilr[(irq_nr % 16) / 2];
 	int ofs = irq_nr / 16 * 16 + (irq_nr & 1) * 8;
 
@@ -99,19 +99,19 @@
 #endif
 }
 
-static void txx9_irq_mask_ack(unsigned int irq)
+static void txx9_irq_mask_ack(struct irq_data *d)
 {
-	unsigned int irq_nr = irq - TXX9_IRQ_BASE;
+	unsigned int irq_nr = d->irq - TXX9_IRQ_BASE;
 
-	txx9_irq_mask(irq);
+	txx9_irq_mask(d);
 	/* clear edge detection */
 	if (unlikely(TXx9_IRCR_EDGE(txx9irq[irq_nr].mode)))
 		__raw_writel(TXx9_IRSCR_EIClrE | irq_nr, &txx9_ircptr->scr);
 }
 
-static int txx9_irq_set_type(unsigned int irq, unsigned int flow_type)
+static int txx9_irq_set_type(struct irq_data *d, unsigned int flow_type)
 {
-	unsigned int irq_nr = irq - TXX9_IRQ_BASE;
+	unsigned int irq_nr = d->irq - TXX9_IRQ_BASE;
 	u32 cr;
 	u32 __iomem *crp;
 	int ofs;
@@ -139,11 +139,11 @@
 
 static struct irq_chip txx9_irq_chip = {
 	.name		= "TXX9",
-	.ack		= txx9_irq_mask_ack,
-	.mask		= txx9_irq_mask,
-	.mask_ack	= txx9_irq_mask_ack,
-	.unmask		= txx9_irq_unmask,
-	.set_type	= txx9_irq_set_type,
+	.irq_ack	= txx9_irq_mask_ack,
+	.irq_mask	= txx9_irq_mask,
+	.irq_mask_ack	= txx9_irq_mask_ack,
+	.irq_unmask	= txx9_irq_unmask,
+	.irq_set_type	= txx9_irq_set_type,
 };
 
 void __init txx9_irq_init(unsigned long baseaddr)

diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index fbaabad..7f5468b 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S

@@ -586,6 +586,10 @@
 	sys	sys_fanotify_init	2
 	sys	sys_fanotify_mark	6
 	sys	sys_prlimit64		4
+	sys	sys_name_to_handle_at	5
+	sys	sys_open_by_handle_at	3	/* 4340 */
+	sys	sys_clock_adjtime	2
+	sys	sys_syncfs		1
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to

diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 3f41792..a2e1fcb 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S

@@ -425,4 +425,8 @@
 	PTR	sys_fanotify_init		/* 5295 */
 	PTR	sys_fanotify_mark
 	PTR	sys_prlimit64
+	PTR	sys_name_to_handle_at
+	PTR	sys_open_by_handle_at
+	PTR	sys_clock_adjtime		/* 5300 */
+	PTR	sys_syncfs
 	.size	sys_call_table,.-sys_call_table

diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index f08ece6..b2c7624 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S

@@ -425,4 +425,8 @@
 	PTR	sys_fanotify_init		/* 6300 */
 	PTR	sys_fanotify_mark
 	PTR	sys_prlimit64
+	PTR	sys_name_to_handle_at
+	PTR	sys_open_by_handle_at
+	PTR	compat_sys_clock_adjtime	/* 6305 */
+	PTR	sys_syncfs
 	.size	sysn32_call_table,.-sysn32_call_table

diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 78d768a..049a9c8 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S

@@ -543,4 +543,8 @@
 	PTR	sys_fanotify_init
 	PTR	sys_32_fanotify_mark
 	PTR	sys_prlimit64
+	PTR	sys_name_to_handle_at
+	PTR	compat_sys_open_by_handle_at	/* 4340 */
+	PTR	compat_sys_clock_adjtime
+	PTR	sys_syncfs
 	.size	sys_call_table,.-sys_call_table

diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 39c0825..f7e2c78 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c

@@ -677,8 +677,9 @@
 	 */
 }
 
-void smtc_forward_irq(unsigned int irq)
+void smtc_forward_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
 	int target;
 
 	/*
@@ -692,7 +693,7 @@
 	 * and efficiency, we just pick the easiest one to find.
 	 */
 
-	target = cpumask_first(irq_desc[irq].affinity);
+	target = cpumask_first(d->affinity);
 
 	/*
 	 * We depend on the platform code to have correctly processed
@@ -707,12 +708,10 @@
 	 */
 
 	/* If no one is eligible, service locally */
-	if (target >= NR_CPUS) {
+	if (target >= NR_CPUS)
 		do_IRQ_no_affinity(irq);
-		return;
-	}
-
-	smtc_send_ipi(target, IRQ_AFFINITY_IPI, irq);
+	else
+		smtc_send_ipi(target, IRQ_AFFINITY_IPI, irq);
 }
 
 #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */

diff --git a/arch/mips/lasat/interrupt.c b/arch/mips/lasat/interrupt.c
index 1353fb1..670e3e7 100644
--- a/arch/mips/lasat/interrupt.c
+++ b/arch/mips/lasat/interrupt.c

@@ -32,24 +32,24 @@
 static volatile int *lasat_int_mask;
 static volatile int lasat_int_mask_shift;
 
-void disable_lasat_irq(unsigned int irq_nr)
+void disable_lasat_irq(struct irq_data *d)
 {
-	irq_nr -= LASAT_IRQ_BASE;
+	unsigned int irq_nr = d->irq - LASAT_IRQ_BASE;
+
 	*lasat_int_mask &= ~(1 << irq_nr) << lasat_int_mask_shift;
 }
 
-void enable_lasat_irq(unsigned int irq_nr)
+void enable_lasat_irq(struct irq_data *d)
 {
-	irq_nr -= LASAT_IRQ_BASE;
+	unsigned int irq_nr = d->irq - LASAT_IRQ_BASE;
+
 	*lasat_int_mask |= (1 << irq_nr) << lasat_int_mask_shift;
 }
 
 static struct irq_chip lasat_irq_type = {
 	.name = "Lasat",
-	.ack = disable_lasat_irq,
-	.mask = disable_lasat_irq,
-	.mask_ack = disable_lasat_irq,
-	.unmask = enable_lasat_irq,
+	.irq_mask = disable_lasat_irq,
+	.irq_unmask = enable_lasat_irq,
 };
 
 static inline int ls1bit32(unsigned int x)

diff --git a/arch/mips/loongson/common/bonito-irq.c b/arch/mips/loongson/common/bonito-irq.c
index 2dc2a4c..1549361 100644
--- a/arch/mips/loongson/common/bonito-irq.c
+++ b/arch/mips/loongson/common/bonito-irq.c

@@ -16,24 +16,22 @@
 
 #include <loongson.h>
 
-static inline void bonito_irq_enable(unsigned int irq)
+static inline void bonito_irq_enable(struct irq_data *d)
 {
-	LOONGSON_INTENSET = (1 << (irq - LOONGSON_IRQ_BASE));
+	LOONGSON_INTENSET = (1 << (d->irq - LOONGSON_IRQ_BASE));
 	mmiowb();
 }
 
-static inline void bonito_irq_disable(unsigned int irq)
+static inline void bonito_irq_disable(struct irq_data *d)
 {
-	LOONGSON_INTENCLR = (1 << (irq - LOONGSON_IRQ_BASE));
+	LOONGSON_INTENCLR = (1 << (d->irq - LOONGSON_IRQ_BASE));
 	mmiowb();
 }
 
 static struct irq_chip bonito_irq_type = {
-	.name	= "bonito_irq",
-	.ack	= bonito_irq_disable,
-	.mask	= bonito_irq_disable,
-	.mask_ack = bonito_irq_disable,
-	.unmask	= bonito_irq_enable,
+	.name		= "bonito_irq",
+	.irq_mask	= bonito_irq_disable,
+	.irq_unmask	= bonito_irq_enable,
 };
 
 static struct irqaction __maybe_unused dma_timeout_irqaction = {

diff --git a/arch/mips/mipssim/sim_smtc.c b/arch/mips/mipssim/sim_smtc.c
index 5da30b6..30df472 100644
--- a/arch/mips/mipssim/sim_smtc.c
+++ b/arch/mips/mipssim/sim_smtc.c

@@ -27,6 +27,7 @@
 #include <asm/atomic.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
+#include <asm/smtc.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
 #include <asm/smtc_ipi.h>
@@ -57,8 +58,6 @@
  */
 static void __cpuinit ssmtc_init_secondary(void)
 {
-	void smtc_init_secondary(void);
-
 	smtc_init_secondary();
 }
 

diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index 192cfd2..e678915 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c

@@ -34,7 +34,6 @@
  */
 static void __cpuinit msmtc_init_secondary(void)
 {
-	void smtc_init_secondary(void);
 	int myvpe;
 
 	/* Don't enable Malta I/O interrupts (IP2) for secondary VPEs */
@@ -114,7 +113,8 @@
  */
 
 
-int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+int plat_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
+			  bool force)
 {
 	cpumask_t tmask;
 	int cpu = 0;
@@ -144,7 +144,7 @@
 		if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
 			cpu_clear(cpu, tmask);
 	}
-	cpumask_copy(irq_desc[irq].affinity, &tmask);
+	cpumask_copy(d->affinity, &tmask);
 
 	if (cpus_empty(tmask))
 		/*
@@ -155,8 +155,8 @@
 			"IRQ affinity leaves no legal CPU for IRQ %d\n", irq);
 
 	/* Do any generic SMTC IRQ affinity setup */
-	smtc_set_irq_affinity(irq, tmask);
+	smtc_set_irq_affinity(d->irq, tmask);
 
-	return 0;
+	return IRQ_SET_MASK_OK_NOCOPY;
 }
 #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */

diff --git a/arch/mips/pmc-sierra/Kconfig b/arch/mips/pmc-sierra/Kconfig
index 8d79849..bbd7608 100644
--- a/arch/mips/pmc-sierra/Kconfig
+++ b/arch/mips/pmc-sierra/Kconfig

@@ -23,6 +23,8 @@
 	select SYS_SUPPORTS_MULTITHREADING
 	select IRQ_MSP_CIC
 	select HW_HAS_PCI
+	select MSP_HAS_USB
+	select MSP_ETH
 
 config PMC_MSP7120_FPGA
 	bool "PMC-Sierra MSP7120 FPGA"
@@ -35,3 +37,16 @@
 config HYPERTRANSPORT
 	bool "Hypertransport Support for PMC-Sierra Yosemite"
 	depends on PMC_YOSEMITE
+
+config MSP_HAS_USB
+	boolean
+	depends on PMC_MSP
+
+config MSP_ETH
+	boolean
+	select MSP_HAS_MAC
+	depends on PMC_MSP
+
+config MSP_HAS_MAC
+	boolean
+	depends on PMC_MSP

diff --git a/arch/mips/pmc-sierra/msp71xx/Makefile b/arch/mips/pmc-sierra/msp71xx/Makefile
index e107f79..cefba77 100644
--- a/arch/mips/pmc-sierra/msp71xx/Makefile
+++ b/arch/mips/pmc-sierra/msp71xx/Makefile

@@ -6,7 +6,9 @@
 obj-$(CONFIG_HAVE_GPIO_LIB) += gpio.o gpio_extended.o
 obj-$(CONFIG_PMC_MSP7120_GW) += msp_hwbutton.o
 obj-$(CONFIG_IRQ_MSP_SLP) += msp_irq_slp.o
-obj-$(CONFIG_IRQ_MSP_CIC) += msp_irq_cic.o
+obj-$(CONFIG_IRQ_MSP_CIC) += msp_irq_cic.o msp_irq_per.o
 obj-$(CONFIG_PCI) += msp_pci.o
-obj-$(CONFIG_MSPETH) += msp_eth.o
-obj-$(CONFIG_USB_MSP71XX) += msp_usb.o
+obj-$(CONFIG_MSP_HAS_MAC) += msp_eth.o
+obj-$(CONFIG_MSP_HAS_USB) += msp_usb.o
+obj-$(CONFIG_MIPS_MT_SMP) += msp_smp.o
+obj-$(CONFIG_MIPS_MT_SMTC) += msp_smtc.o

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_eth.c b/arch/mips/pmc-sierra/msp71xx/msp_eth.c
new file mode 100644
index 0000000..c584df3
--- /dev/null
+++ b/arch/mips/pmc-sierra/msp71xx/msp_eth.c

@@ -0,0 +1,187 @@
+/*
+ * The setup file for ethernet related hardware on PMC-Sierra MSP processors.
+ *
+ * Copyright 2010 PMC-Sierra, Inc.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <msp_regs.h>
+#include <msp_int.h>
+#include <msp_gpio_macros.h>
+
+
+#define MSP_ETHERNET_GPIO0	14
+#define MSP_ETHERNET_GPIO1	15
+#define MSP_ETHERNET_GPIO2	16
+
+#ifdef CONFIG_MSP_HAS_TSMAC
+#define MSP_TSMAC_SIZE	0x10020
+#define MSP_TSMAC_ID	"pmc_tsmac"
+
+static struct resource msp_tsmac0_resources[] = {
+	[0] = {
+		.start	= MSP_MAC0_BASE,
+		.end	= MSP_MAC0_BASE + MSP_TSMAC_SIZE - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= MSP_INT_MAC0,
+		.end	= MSP_INT_MAC0,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct resource msp_tsmac1_resources[] = {
+	[0] = {
+		.start	= MSP_MAC1_BASE,
+		.end	= MSP_MAC1_BASE + MSP_TSMAC_SIZE - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= MSP_INT_MAC1,
+		.end	= MSP_INT_MAC1,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+static struct resource msp_tsmac2_resources[] = {
+	[0] = {
+		.start	= MSP_MAC2_BASE,
+		.end	= MSP_MAC2_BASE + MSP_TSMAC_SIZE - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= MSP_INT_SAR,
+		.end	= MSP_INT_SAR,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+
+static struct platform_device tsmac_device[] = {
+	[0] = {
+		.name	= MSP_TSMAC_ID,
+		.id	= 0,
+		.num_resources = ARRAY_SIZE(msp_tsmac0_resources),
+		.resource = msp_tsmac0_resources,
+	},
+	[1] = {
+		.name	= MSP_TSMAC_ID,
+		.id	= 1,
+		.num_resources = ARRAY_SIZE(msp_tsmac1_resources),
+		.resource = msp_tsmac1_resources,
+	},
+	[2] = {
+		.name	= MSP_TSMAC_ID,
+		.id	= 2,
+		.num_resources = ARRAY_SIZE(msp_tsmac2_resources),
+		.resource = msp_tsmac2_resources,
+	},
+};
+#define msp_eth_devs	tsmac_device
+
+#else
+/* If it is not TSMAC assume MSP_ETH (100Mbps) */
+#define MSP_ETH_ID	"pmc_mspeth"
+#define MSP_ETH_SIZE	0xE0
+static struct resource msp_eth0_resources[] = {
+	[0] = {
+		.start	= MSP_MAC0_BASE,
+		.end	= MSP_MAC0_BASE + MSP_ETH_SIZE - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= MSP_INT_MAC0,
+		.end	= MSP_INT_MAC0,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct resource msp_eth1_resources[] = {
+	[0] = {
+		.start	= MSP_MAC1_BASE,
+		.end	= MSP_MAC1_BASE + MSP_ETH_SIZE - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= MSP_INT_MAC1,
+		.end	= MSP_INT_MAC1,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+
+
+static struct platform_device mspeth_device[] = {
+	[0] = {
+		.name	= MSP_ETH_ID,
+		.id	= 0,
+		.num_resources = ARRAY_SIZE(msp_eth0_resources),
+		.resource = msp_eth0_resources,
+	},
+	[1] = {
+		.name	= MSP_ETH_ID,
+		.id	= 1,
+		.num_resources = ARRAY_SIZE(msp_eth1_resources),
+		.resource = msp_eth1_resources,
+	},
+
+};
+#define msp_eth_devs	mspeth_device
+
+#endif
+int __init msp_eth_setup(void)
+{
+	int i, ret = 0;
+
+	/* Configure the GPIO and take the ethernet PHY out of reset */
+	msp_gpio_pin_mode(MSP_GPIO_OUTPUT, MSP_ETHERNET_GPIO0);
+	msp_gpio_pin_hi(MSP_ETHERNET_GPIO0);
+
+#ifdef CONFIG_MSP_HAS_TSMAC
+	/* 3 phys on boards with TSMAC */
+	msp_gpio_pin_mode(MSP_GPIO_OUTPUT, MSP_ETHERNET_GPIO1);
+	msp_gpio_pin_hi(MSP_ETHERNET_GPIO1);
+
+	msp_gpio_pin_mode(MSP_GPIO_OUTPUT, MSP_ETHERNET_GPIO2);
+	msp_gpio_pin_hi(MSP_ETHERNET_GPIO2);
+#endif
+	for (i = 0; i < ARRAY_SIZE(msp_eth_devs); i++) {
+		ret = platform_device_register(&msp_eth_devs[i]);
+		printk(KERN_INFO "device: %d, return value = %d\n", i, ret);
+		if (ret) {
+			platform_device_unregister(&msp_eth_devs[i]);
+			break;
+		}
+	}
+
+	if (ret)
+		printk(KERN_WARNING "Could not initialize "
+						"MSPETH device structures.\n");
+
+	return ret;
+}
+subsys_initcall(msp_eth_setup);

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq.c b/arch/mips/pmc-sierra/msp71xx/msp_irq.c
index 734d598..4531c4a 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_irq.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_irq.c

@@ -19,8 +19,6 @@
 
 #include <msp_int.h>
 
-extern void msp_int_handle(void);
-
 /* SLP bases systems */
 extern void msp_slp_irq_init(void);
 extern void msp_slp_irq_dispatch(void);
@@ -29,6 +27,18 @@
 extern void msp_cic_irq_init(void);
 extern void msp_cic_irq_dispatch(void);
 
+/* VSMP support init */
+extern void msp_vsmp_int_init(void);
+
+/* vectored interrupt implementation */
+
+/* SW0/1 interrupts are used for SMP/SMTC */
+static inline void mac0_int_dispatch(void) { do_IRQ(MSP_INT_MAC0); }
+static inline void mac1_int_dispatch(void) { do_IRQ(MSP_INT_MAC1); }
+static inline void mac2_int_dispatch(void) { do_IRQ(MSP_INT_SAR); }
+static inline void usb_int_dispatch(void)  { do_IRQ(MSP_INT_USB);  }
+static inline void sec_int_dispatch(void)  { do_IRQ(MSP_INT_SEC);  }
+
 /*
  * The PMC-Sierra MSP interrupts are arranged in a 3 level cascaded
  * hierarchical system.  The first level are the direct MIPS interrupts
@@ -96,29 +106,57 @@
 		do_IRQ(MSP_INT_SW1);
 }
 
-static struct irqaction cascade_msp = {
+static struct irqaction cic_cascade_msp = {
 	.handler = no_action,
-	.name	 = "MSP cascade"
+	.name	 = "MSP CIC cascade"
 };
 
+static struct irqaction per_cascade_msp = {
+	.handler = no_action,
+	.name	 = "MSP PER cascade"
+};
 
 void __init arch_init_irq(void)
 {
+	/* assume we'll be using vectored interrupt mode except in UP mode*/
+#ifdef CONFIG_MIPS_MT
+	BUG_ON(!cpu_has_vint);
+#endif
 	/* initialize the 1st-level CPU based interrupt controller */
 	mips_cpu_irq_init();
 
 #ifdef CONFIG_IRQ_MSP_CIC
 	msp_cic_irq_init();
+#ifdef CONFIG_MIPS_MT
+	set_vi_handler(MSP_INT_CIC, msp_cic_irq_dispatch);
+	set_vi_handler(MSP_INT_MAC0, mac0_int_dispatch);
+	set_vi_handler(MSP_INT_MAC1, mac1_int_dispatch);
+	set_vi_handler(MSP_INT_SAR, mac2_int_dispatch);
+	set_vi_handler(MSP_INT_USB, usb_int_dispatch);
+	set_vi_handler(MSP_INT_SEC, sec_int_dispatch);
+#ifdef CONFIG_MIPS_MT_SMP
+	msp_vsmp_int_init();
+#elif defined CONFIG_MIPS_MT_SMTC
+	/*Set hwmask for all platform devices */
+	irq_hwmask[MSP_INT_MAC0] = C_IRQ0;
+	irq_hwmask[MSP_INT_MAC1] = C_IRQ1;
+	irq_hwmask[MSP_INT_USB] = C_IRQ2;
+	irq_hwmask[MSP_INT_SAR] = C_IRQ3;
+	irq_hwmask[MSP_INT_SEC] = C_IRQ5;
 
+#endif	/* CONFIG_MIPS_MT_SMP */
+#endif	/* CONFIG_MIPS_MT */
 	/* setup the cascaded interrupts */
-	setup_irq(MSP_INT_CIC, &cascade_msp);
-	setup_irq(MSP_INT_PER, &cascade_msp);
+	setup_irq(MSP_INT_CIC, &cic_cascade_msp);
+	setup_irq(MSP_INT_PER, &per_cascade_msp);
+
 #else
 	/* setup the 2nd-level SLP register based interrupt controller */
+	/* VSMP /SMTC support support is not enabled for SLP */
 	msp_slp_irq_init();
 
 	/* setup the cascaded SLP/PER interrupts */
-	setup_irq(MSP_INT_SLP, &cascade_msp);
-	setup_irq(MSP_INT_PER, &cascade_msp);
+	setup_irq(MSP_INT_SLP, &cic_cascade_msp);
+	setup_irq(MSP_INT_PER, &per_cascade_msp);
 #endif
 }

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq_cic.c b/arch/mips/pmc-sierra/msp71xx/msp_irq_cic.c
index 07e71ff..352f29d 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_irq_cic.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_irq_cic.c

@@ -1,8 +1,7 @@
 /*
- * This file define the irq handler for MSP SLM subsystem interrupts.
+ * Copyright 2010 PMC-Sierra, Inc, derived from irq_cpu.c
  *
- * Copyright 2005-2007 PMC-Sierra, Inc, derived from irq_cpu.c
- * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com
+ * This file define the irq handler for MSP CIC subsystem interrupts.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -16,119 +15,203 @@
 #include <linux/bitops.h>
 #include <linux/irq.h>
 
+#include <asm/mipsregs.h>
 #include <asm/system.h>
 
 #include <msp_cic_int.h>
 #include <msp_regs.h>
 
 /*
- * NOTE: We are only enabling support for VPE0 right now.
+ * External API
  */
+extern void msp_per_irq_init(void);
+extern void msp_per_irq_dispatch(void);
 
-static inline void unmask_msp_cic_irq(unsigned int irq)
-{
-
-	/* check for PER interrupt range */
-	if (irq < MSP_PER_INTBASE)
-		*CIC_VPE0_MSK_REG |= (1 << (irq - MSP_CIC_INTBASE));
-	else
-		*PER_INT_MSK_REG |= (1 << (irq - MSP_PER_INTBASE));
-}
-
-static inline void mask_msp_cic_irq(unsigned int irq)
-{
-	/* check for PER interrupt range */
-	if (irq < MSP_PER_INTBASE)
-		*CIC_VPE0_MSK_REG &= ~(1 << (irq - MSP_CIC_INTBASE));
-	else
-		*PER_INT_MSK_REG &= ~(1 << (irq - MSP_PER_INTBASE));
-}
 
 /*
- * While we ack the interrupt interrupts are disabled and thus we don't need
- * to deal with concurrency issues.  Same for msp_cic_irq_end.
+ * Convenience Macro.  Should be somewhere generic.
  */
-static inline void ack_msp_cic_irq(unsigned int irq)
+#define get_current_vpe()   \
+	((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE)
+
+#ifdef CONFIG_SMP
+
+#define LOCK_VPE(flags, mtflags) \
+do {				\
+	local_irq_save(flags);	\
+	mtflags = dmt();	\
+} while (0)
+
+#define UNLOCK_VPE(flags, mtflags) \
+do {				\
+	emt(mtflags);		\
+	local_irq_restore(flags);\
+} while (0)
+
+#define LOCK_CORE(flags, mtflags) \
+do {				\
+	local_irq_save(flags);	\
+	mtflags = dvpe();	\
+} while (0)
+
+#define UNLOCK_CORE(flags, mtflags)		\
+do {				\
+	evpe(mtflags);		\
+	local_irq_restore(flags);\
+} while (0)
+
+#else
+
+#define LOCK_VPE(flags, mtflags)
+#define UNLOCK_VPE(flags, mtflags)
+#endif
+
+/* ensure writes to cic are completed */
+static inline void cic_wmb(void)
 {
-	mask_msp_cic_irq(irq);
+	const volatile void __iomem *cic_mem = CIC_VPE0_MSK_REG;
+	volatile u32 dummy_read;
+
+	wmb();
+	dummy_read = __raw_readl(cic_mem);
+	dummy_read++;
+}
+
+static void unmask_cic_irq(struct irq_data *d)
+{
+	volatile u32   *cic_msk_reg = CIC_VPE0_MSK_REG;
+	int vpe;
+#ifdef CONFIG_SMP
+	unsigned int mtflags;
+	unsigned long  flags;
 
 	/*
-	 * only really necessary for 18, 16-14 and sometimes 3:0 (since
-	 * these can be edge sensitive) but it doesn't hurt for the others.
-	 */
+	* Make sure we have IRQ affinity.  It may have changed while
+	* we were processing the IRQ.
+	*/
+	if (!cpumask_test_cpu(smp_processor_id(), d->affinity))
+		return;
+#endif
 
-	/* check for PER interrupt range */
-	if (irq < MSP_PER_INTBASE)
-		*CIC_STS_REG = (1 << (irq - MSP_CIC_INTBASE));
-	else
-		*PER_INT_STS_REG = (1 << (irq - MSP_PER_INTBASE));
+	vpe = get_current_vpe();
+	LOCK_VPE(flags, mtflags);
+	cic_msk_reg[vpe] |= (1 << (d->irq - MSP_CIC_INTBASE));
+	UNLOCK_VPE(flags, mtflags);
+	cic_wmb();
 }
 
+static void mask_cic_irq(struct irq_data *d)
+{
+	volatile u32 *cic_msk_reg = CIC_VPE0_MSK_REG;
+	int	vpe = get_current_vpe();
+#ifdef CONFIG_SMP
+	unsigned long flags, mtflags;
+#endif
+	LOCK_VPE(flags, mtflags);
+	cic_msk_reg[vpe] &= ~(1 << (d->irq - MSP_CIC_INTBASE));
+	UNLOCK_VPE(flags, mtflags);
+	cic_wmb();
+}
+static void msp_cic_irq_ack(struct irq_data *d)
+{
+	mask_cic_irq(d);
+	/*
+	* Only really necessary for 18, 16-14 and sometimes 3:0
+	* (since these can be edge sensitive) but it doesn't
+	* hurt for the others
+	*/
+	*CIC_STS_REG = (1 << (d->irq - MSP_CIC_INTBASE));
+	smtc_im_ack_irq(d->irq);
+}
+
+/*Note: Limiting to VSMP . Not tested in SMTC */
+
+#ifdef CONFIG_MIPS_MT_SMP
+static int msp_cic_irq_set_affinity(struct irq_data *d,
+				    const struct cpumask *cpumask, bool force)
+{
+	int cpu;
+	unsigned long flags;
+	unsigned int  mtflags;
+	unsigned long imask = (1 << (irq - MSP_CIC_INTBASE));
+	volatile u32 *cic_mask = (volatile u32 *)CIC_VPE0_MSK_REG;
+
+	/* timer balancing should be disabled in kernel code */
+	BUG_ON(irq == MSP_INT_VPE0_TIMER || irq == MSP_INT_VPE1_TIMER);
+
+	LOCK_CORE(flags, mtflags);
+	/* enable if any of each VPE's TCs require this IRQ */
+	for_each_online_cpu(cpu) {
+		if (cpumask_test_cpu(cpu, cpumask))
+			cic_mask[cpu] |= imask;
+		else
+			cic_mask[cpu] &= ~imask;
+
+	}
+
+	UNLOCK_CORE(flags, mtflags);
+	return 0;
+
+}
+#endif
+
 static struct irq_chip msp_cic_irq_controller = {
 	.name = "MSP_CIC",
-	.ack = ack_msp_cic_irq,
-	.mask = ack_msp_cic_irq,
-	.mask_ack = ack_msp_cic_irq,
-	.unmask = unmask_msp_cic_irq,
+	.irq_mask = mask_cic_irq,
+	.irq_mask_ack = msp_cic_irq_ack,
+	.irq_unmask = unmask_cic_irq,
+	.irq_ack = msp_cic_irq_ack,
+#ifdef CONFIG_MIPS_MT_SMP
+	.irq_set_affinity = msp_cic_irq_set_affinity,
+#endif
 };
 
-
 void __init msp_cic_irq_init(void)
 {
 	int i;
-
 	/* Mask/clear interrupts. */
 	*CIC_VPE0_MSK_REG = 0x00000000;
-	*PER_INT_MSK_REG  = 0x00000000;
+	*CIC_VPE1_MSK_REG = 0x00000000;
 	*CIC_STS_REG      = 0xFFFFFFFF;
-	*PER_INT_STS_REG  = 0xFFFFFFFF;
-
-#if defined(CONFIG_PMC_MSP7120_GW) || \
-    defined(CONFIG_PMC_MSP7120_EVAL)
 	/*
-	 * The MSP7120 RG and EVBD boards use IRQ[6:4] for PCI.
-	 * These inputs map to EXT_INT_POL[6:4] inside the CIC.
-	 * They are to be active low, level sensitive.
-	 */
+	* The MSP7120 RG and EVBD boards use IRQ[6:4] for PCI.
+	* These inputs map to EXT_INT_POL[6:4] inside the CIC.
+	* They are to be active low, level sensitive.
+	*/
 	*CIC_EXT_CFG_REG &= 0xFFFF8F8F;
-#endif
 
 	/* initialize all the IRQ descriptors */
-	for (i = MSP_CIC_INTBASE; i < MSP_PER_INTBASE + 32; i++)
+	for (i = MSP_CIC_INTBASE ; i < MSP_CIC_INTBASE + 32 ; i++) {
 		set_irq_chip_and_handler(i, &msp_cic_irq_controller,
 					 handle_level_irq);
+#ifdef CONFIG_MIPS_MT_SMTC
+		/* Mask of CIC interrupt */
+		irq_hwmask[i] = C_IRQ4;
+#endif
+	}
+
+	/* Initialize the PER interrupt sub-system */
+	 msp_per_irq_init();
 }
 
+/* CIC masked by CIC vector processing before dispatch called */
 void msp_cic_irq_dispatch(void)
 {
-	u32 pending;
-	int intbase;
-
-	intbase = MSP_CIC_INTBASE;
-	pending = *CIC_STS_REG & *CIC_VPE0_MSK_REG;
-
-	/* check for PER interrupt */
-	if (pending == (1 << (MSP_INT_PER - MSP_CIC_INTBASE))) {
-		intbase = MSP_PER_INTBASE;
-		pending = *PER_INT_STS_REG & *PER_INT_MSK_REG;
-	}
-
-	/* check for spurious interrupt */
-	if (pending == 0x00000000) {
-		printk(KERN_ERR
-			"Spurious %s interrupt? status %08x, mask %08x\n",
-			(intbase == MSP_CIC_INTBASE) ? "CIC" : "PER",
-			(intbase == MSP_CIC_INTBASE) ?
-				*CIC_STS_REG : *PER_INT_STS_REG,
-			(intbase == MSP_CIC_INTBASE) ?
-				*CIC_VPE0_MSK_REG : *PER_INT_MSK_REG);
-		return;
-	}
-
-	/* check for the timer and dispatch it first */
-	if ((intbase == MSP_CIC_INTBASE) &&
-	    (pending & (1 << (MSP_INT_VPE0_TIMER - MSP_CIC_INTBASE))))
+	volatile u32	*cic_msk_reg = (volatile u32 *)CIC_VPE0_MSK_REG;
+	u32	cic_mask;
+	u32	 pending;
+	int	cic_status = *CIC_STS_REG;
+	cic_mask = cic_msk_reg[get_current_vpe()];
+	pending = cic_status & cic_mask;
+	if (pending & (1 << (MSP_INT_VPE0_TIMER - MSP_CIC_INTBASE))) {
 		do_IRQ(MSP_INT_VPE0_TIMER);
-	else
-		do_IRQ(ffs(pending) + intbase - 1);
+	} else if (pending & (1 << (MSP_INT_VPE1_TIMER - MSP_CIC_INTBASE))) {
+		do_IRQ(MSP_INT_VPE1_TIMER);
+	} else if (pending & (1 << (MSP_INT_PER - MSP_CIC_INTBASE))) {
+		msp_per_irq_dispatch();
+	} else if (pending) {
+		do_IRQ(ffs(pending) + MSP_CIC_INTBASE - 1);
+	} else{
+		spurious_interrupt();
+	}
 }

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c
new file mode 100644
index 0000000..f9b9dcd
--- /dev/null
+++ b/arch/mips/pmc-sierra/msp71xx/msp_irq_per.c

@@ -0,0 +1,135 @@
+/*
+ * Copyright 2010 PMC-Sierra, Inc, derived from irq_cpu.c
+ *
+ * This file define the irq handler for MSP PER subsystem interrupts.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/bitops.h>
+
+#include <asm/mipsregs.h>
+#include <asm/system.h>
+
+#include <msp_cic_int.h>
+#include <msp_regs.h>
+
+
+/*
+ * Convenience Macro.  Should be somewhere generic.
+ */
+#define get_current_vpe()	\
+	((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE)
+
+#ifdef CONFIG_SMP
+/*
+ * The PER registers must be protected from concurrent access.
+ */
+
+static DEFINE_SPINLOCK(per_lock);
+#endif
+
+/* ensure writes to per are completed */
+
+static inline void per_wmb(void)
+{
+	const volatile void __iomem *per_mem = PER_INT_MSK_REG;
+	volatile u32 dummy_read;
+
+	wmb();
+	dummy_read = __raw_readl(per_mem);
+	dummy_read++;
+}
+
+static inline void unmask_per_irq(struct irq_data *d)
+{
+#ifdef CONFIG_SMP
+	unsigned long flags;
+	spin_lock_irqsave(&per_lock, flags);
+	*PER_INT_MSK_REG |= (1 << (d->irq - MSP_PER_INTBASE));
+	spin_unlock_irqrestore(&per_lock, flags);
+#else
+	*PER_INT_MSK_REG |= (1 << (d->irq - MSP_PER_INTBASE));
+#endif
+	per_wmb();
+}
+
+static inline void mask_per_irq(struct irq_data *d)
+{
+#ifdef CONFIG_SMP
+	unsigned long flags;
+	spin_lock_irqsave(&per_lock, flags);
+	*PER_INT_MSK_REG &= ~(1 << (d->irq - MSP_PER_INTBASE));
+	spin_unlock_irqrestore(&per_lock, flags);
+#else
+	*PER_INT_MSK_REG &= ~(1 << (d->irq - MSP_PER_INTBASE));
+#endif
+	per_wmb();
+}
+
+static inline void msp_per_irq_ack(struct irq_data *d)
+{
+	mask_per_irq(d);
+	/*
+	 * In the PER interrupt controller, only bits 11 and 10
+	 * are write-to-clear, (SPI TX complete, SPI RX complete).
+	 * It does nothing for any others.
+	 */
+	*PER_INT_STS_REG = (1 << (d->irq - MSP_PER_INTBASE));
+}
+
+#ifdef CONFIG_SMP
+static int msp_per_irq_set_affinity(struct irq_data *d,
+				    const struct cpumask *affinity, bool force)
+{
+	/* WTF is this doing ????? */
+	unmask_per_irq(d);
+	return 0;
+}
+#endif
+
+static struct irq_chip msp_per_irq_controller = {
+	.name = "MSP_PER",
+	.irq_enable = unmask_per_irq.
+	.irq_disable = mask_per_irq,
+	.irq_ack = msp_per_irq_ack,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = msp_per_irq_set_affinity,
+#endif
+};
+
+void __init msp_per_irq_init(void)
+{
+	int i;
+	/* Mask/clear interrupts. */
+	*PER_INT_MSK_REG  = 0x00000000;
+	*PER_INT_STS_REG  = 0xFFFFFFFF;
+	/* initialize all the IRQ descriptors */
+	for (i = MSP_PER_INTBASE; i < MSP_PER_INTBASE + 32; i++) {
+		irq_set_chip(i, &msp_per_irq_controller);
+#ifdef CONFIG_MIPS_MT_SMTC
+		irq_hwmask[i] = C_IRQ4;
+#endif
+	}
+}
+
+void msp_per_irq_dispatch(void)
+{
+	u32	per_mask = *PER_INT_MSK_REG;
+	u32	per_status = *PER_INT_STS_REG;
+	u32	pending;
+
+	pending = per_status & per_mask;
+	if (pending) {
+		do_IRQ(ffs(pending) + MSP_PER_INTBASE - 1);
+	} else {
+		spurious_interrupt();
+	}
+}

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq_slp.c b/arch/mips/pmc-sierra/msp71xx/msp_irq_slp.c
index 61f3902..8f51e4a 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_irq_slp.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_irq_slp.c

@@ -21,8 +21,10 @@
 #include <msp_slp_int.h>
 #include <msp_regs.h>
 
-static inline void unmask_msp_slp_irq(unsigned int irq)
+static inline void unmask_msp_slp_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
+
 	/* check for PER interrupt range */
 	if (irq < MSP_PER_INTBASE)
 		*SLP_INT_MSK_REG |= (1 << (irq - MSP_SLP_INTBASE));
@@ -30,8 +32,10 @@
 		*PER_INT_MSK_REG |= (1 << (irq - MSP_PER_INTBASE));
 }
 
-static inline void mask_msp_slp_irq(unsigned int irq)
+static inline void mask_msp_slp_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
+
 	/* check for PER interrupt range */
 	if (irq < MSP_PER_INTBASE)
 		*SLP_INT_MSK_REG &= ~(1 << (irq - MSP_SLP_INTBASE));
@@ -43,8 +47,10 @@
  * While we ack the interrupt interrupts are disabled and thus we don't need
  * to deal with concurrency issues.  Same for msp_slp_irq_end.
  */
-static inline void ack_msp_slp_irq(unsigned int irq)
+static inline void ack_msp_slp_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
+
 	/* check for PER interrupt range */
 	if (irq < MSP_PER_INTBASE)
 		*SLP_INT_STS_REG = (1 << (irq - MSP_SLP_INTBASE));
@@ -54,9 +60,9 @@
 
 static struct irq_chip msp_slp_irq_controller = {
 	.name = "MSP_SLP",
-	.ack = ack_msp_slp_irq,
-	.mask = mask_msp_slp_irq,
-	.unmask = unmask_msp_slp_irq,
+	.irq_ack = ack_msp_slp_irq,
+	.irq_mask = mask_msp_slp_irq,
+	.irq_unmask = unmask_msp_slp_irq,
 };
 
 void __init msp_slp_irq_init(void)

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_setup.c b/arch/mips/pmc-sierra/msp71xx/msp_setup.c
index a54e85b..fb37a10 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_setup.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_setup.c

@@ -146,6 +146,8 @@
 	pm_power_off = msp_power_off;
 }
 
+extern struct plat_smp_ops msp_smtc_smp_ops;
+
 void __init prom_init(void)
 {
 	unsigned long family;
@@ -226,6 +228,14 @@
 	 */
 	msp_serial_setup();
 
+#ifdef CONFIG_MIPS_MT_SMP
+	register_smp_ops(&vsmp_smp_ops);
+#endif
+
+#ifdef CONFIG_MIPS_MT_SMTC
+	register_smp_ops(&msp_smtc_smp_ops);
+#endif
+
 #ifdef CONFIG_PMCTWILED
 	/*
 	 * Setup LED states before the subsys_initcall loads other

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_smp.c b/arch/mips/pmc-sierra/msp71xx/msp_smp.c
new file mode 100644
index 0000000..43a9e26
--- /dev/null
+++ b/arch/mips/pmc-sierra/msp71xx/msp_smp.c

@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2000, 2001, 2004 MIPS Technologies, Inc.
+ * Copyright (C) 2001 Ralf Baechle
+ * Copyright (C) 2010 PMC-Sierra, Inc.
+ *
+ *  VSMP support for MSP platforms . Derived from malta vsmp support.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ */
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+
+#ifdef CONFIG_MIPS_MT_SMP
+#define MIPS_CPU_IPI_RESCHED_IRQ 0	/* SW int 0 for resched */
+#define MIPS_CPU_IPI_CALL_IRQ 1		/* SW int 1 for call */
+
+
+static void ipi_resched_dispatch(void)
+{
+	do_IRQ(MIPS_CPU_IPI_RESCHED_IRQ);
+}
+
+static void ipi_call_dispatch(void)
+{
+	do_IRQ(MIPS_CPU_IPI_CALL_IRQ);
+}
+
+static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
+{
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
+{
+	smp_call_function_interrupt();
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq_resched = {
+	.handler	= ipi_resched_interrupt,
+	.flags		= IRQF_DISABLED | IRQF_PERCPU,
+	.name		= "IPI_resched"
+};
+
+static struct irqaction irq_call = {
+	.handler	= ipi_call_interrupt,
+	.flags		= IRQF_DISABLED | IRQF_PERCPU,
+	.name		= "IPI_call"
+};
+
+void __init arch_init_ipiirq(int irq, struct irqaction *action)
+{
+	setup_irq(irq, action);
+	set_irq_handler(irq, handle_percpu_irq);
+}
+
+void __init msp_vsmp_int_init(void)
+{
+	set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
+	set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
+	arch_init_ipiirq(MIPS_CPU_IPI_RESCHED_IRQ, &irq_resched);
+	arch_init_ipiirq(MIPS_CPU_IPI_CALL_IRQ, &irq_call);
+}
+#endif /* CONFIG_MIPS_MT_SMP */

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_smtc.c b/arch/mips/pmc-sierra/msp71xx/msp_smtc.c
new file mode 100644
index 0000000..c8dcc1c
--- /dev/null
+++ b/arch/mips/pmc-sierra/msp71xx/msp_smtc.c

@@ -0,0 +1,105 @@
+/*
+ * MSP71xx Platform-specific hooks for SMP operation
+ */
+#include <linux/irq.h>
+#include <linux/init.h>
+
+#include <asm/mipsmtregs.h>
+#include <asm/mipsregs.h>
+#include <asm/smtc.h>
+#include <asm/smtc_ipi.h>
+
+/* VPE/SMP Prototype implements platform interfaces directly */
+
+/*
+ * Cause the specified action to be performed on a targeted "CPU"
+ */
+
+static void msp_smtc_send_ipi_single(int cpu, unsigned int action)
+{
+	/* "CPU" may be TC of same VPE, VPE of same CPU, or different CPU */
+	smtc_send_ipi(cpu, LINUX_SMP_IPI, action);
+}
+
+static void msp_smtc_send_ipi_mask(const struct cpumask *mask,
+						unsigned int action)
+{
+	unsigned int i;
+
+	for_each_cpu(i, mask)
+		msp_smtc_send_ipi_single(i, action);
+}
+
+/*
+ * Post-config but pre-boot cleanup entry point
+ */
+static void __cpuinit msp_smtc_init_secondary(void)
+{
+	int myvpe;
+
+	/* Don't enable Malta I/O interrupts (IP2) for secondary VPEs */
+	myvpe = read_c0_tcbind() & TCBIND_CURVPE;
+	if (myvpe > 0)
+		change_c0_status(ST0_IM, STATUSF_IP0 | STATUSF_IP1 |
+				STATUSF_IP6 | STATUSF_IP7);
+	smtc_init_secondary();
+}
+
+/*
+ * Platform "CPU" startup hook
+ */
+static void __cpuinit msp_smtc_boot_secondary(int cpu,
+					struct task_struct *idle)
+{
+	smtc_boot_secondary(cpu, idle);
+}
+
+/*
+ * SMP initialization finalization entry point
+ */
+static void __cpuinit msp_smtc_smp_finish(void)
+{
+	smtc_smp_finish();
+}
+
+/*
+ * Hook for after all CPUs are online
+ */
+
+static void msp_smtc_cpus_done(void)
+{
+}
+
+/*
+ * Platform SMP pre-initialization
+ *
+ * As noted above, we can assume a single CPU for now
+ * but it may be multithreaded.
+ */
+
+static void __init msp_smtc_smp_setup(void)
+{
+	/*
+	 * we won't get the definitive value until
+	 * we've run smtc_prepare_cpus later, but
+	 */
+
+	if (read_c0_config3() & (1 << 2))
+		smp_num_siblings = smtc_build_cpu_map(0);
+}
+
+static void __init msp_smtc_prepare_cpus(unsigned int max_cpus)
+{
+	smtc_prepare_cpus(max_cpus);
+}
+
+struct plat_smp_ops msp_smtc_smp_ops = {
+	.send_ipi_single	= msp_smtc_send_ipi_single,
+	.send_ipi_mask		= msp_smtc_send_ipi_mask,
+	.init_secondary		= msp_smtc_init_secondary,
+	.smp_finish		= msp_smtc_smp_finish,
+	.cpus_done		= msp_smtc_cpus_done,
+	.boot_secondary		= msp_smtc_boot_secondary,
+	.smp_setup		= msp_smtc_smp_setup,
+	.prepare_cpus		= msp_smtc_prepare_cpus,
+};

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c
index 01df84c..8b42f30 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_time.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c

@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/ptrace.h>
 
+#include <asm/cevt-r4k.h>
 #include <asm/mipsregs.h>
 #include <asm/time.h>
 
@@ -36,6 +37,12 @@
 #include <msp_int.h>
 #include <msp_regs.h>
 
+#define get_current_vpe()   \
+	((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE)
+
+static struct irqaction timer_vpe1;
+static int tim_installed;
+
 void __init plat_time_init(void)
 {
 	char    *endp, *s;
@@ -83,5 +90,12 @@
 
 unsigned int __cpuinit get_c0_compare_int(void)
 {
-	return MSP_INT_VPE0_TIMER;
+	/* MIPS_MT modes may want timer for second VPE */
+	if ((get_current_vpe()) && !tim_installed) {
+		memcpy(&timer_vpe1, &c0_compare_irqaction, sizeof(timer_vpe1));
+		setup_irq(MSP_INT_VPE1_TIMER, &timer_vpe1);
+		tim_installed++;
+	}
+
+	return get_current_vpe() ? MSP_INT_VPE1_TIMER : MSP_INT_VPE0_TIMER;
 }

diff --git a/arch/mips/pmc-sierra/msp71xx/msp_usb.c b/arch/mips/pmc-sierra/msp71xx/msp_usb.c
index 0ee01e3..9a1aef8 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_usb.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_usb.c

@@ -1,7 +1,7 @@
 /*
  * The setup file for USB related hardware on PMC-Sierra MSP processors.
  *
- * Copyright 2006-2007 PMC-Sierra, Inc.
+ * Copyright 2006 PMC-Sierra, Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -23,8 +23,8 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+#if defined(CONFIG_USB_EHCI_HCD) || defined(CONFIG_USB_GADGET)
 
-#include <linux/dma-mapping.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
@@ -34,40 +34,56 @@
 #include <msp_regs.h>
 #include <msp_int.h>
 #include <msp_prom.h>
+#include <msp_usb.h>
+
 
 #if defined(CONFIG_USB_EHCI_HCD)
-static struct resource msp_usbhost_resources [] = {
-	[0] = {
-		.start	= MSP_USB_BASE_START,
-		.end	= MSP_USB_BASE_END,
-		.flags 	= IORESOURCE_MEM,
+static struct resource msp_usbhost0_resources[] = {
+	[0] = { /* EHCI-HS operational and capabilities registers */
+		.start  = MSP_USB0_HS_START,
+		.end    = MSP_USB0_HS_END,
+		.flags  = IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= MSP_INT_USB,
-		.end	= MSP_INT_USB,
-		.flags	= IORESOURCE_IRQ,
+		.start  = MSP_INT_USB,
+		.end    = MSP_INT_USB,
+		.flags  = IORESOURCE_IRQ,
+	},
+	[2] = { /* MSBus-to-AMBA bridge register space */
+		.start	= MSP_USB0_MAB_START,
+		.end	= MSP_USB0_MAB_END,
+		.flags	= IORESOURCE_MEM,
+	},
+	[3] = { /* Identification and general hardware parameters */
+		.start	= MSP_USB0_ID_START,
+		.end	= MSP_USB0_ID_END,
+		.flags	= IORESOURCE_MEM,
 	},
 };
 
-static u64 msp_usbhost_dma_mask = DMA_BIT_MASK(32);
+static u64 msp_usbhost0_dma_mask = 0xffffffffUL;
 
-static struct platform_device msp_usbhost_device = {
-	.name	= "pmcmsp-ehci",
-	.id	= 0,
+static struct mspusb_device msp_usbhost0_device = {
 	.dev	= {
-		.dma_mask = &msp_usbhost_dma_mask,
-		.coherent_dma_mask = DMA_BIT_MASK(32),
+		.name	= "pmcmsp-ehci",
+		.id	= 0,
+		.dev	= {
+			.dma_mask = &msp_usbhost0_dma_mask,
+			.coherent_dma_mask = 0xffffffffUL,
+		},
+		.num_resources  = ARRAY_SIZE(msp_usbhost0_resources),
+		.resource       = msp_usbhost0_resources,
 	},
-	.num_resources 	= ARRAY_SIZE(msp_usbhost_resources),
-	.resource	= msp_usbhost_resources,
 };
-#endif /* CONFIG_USB_EHCI_HCD */
 
-#if defined(CONFIG_USB_GADGET)
-static struct resource msp_usbdev_resources [] = {
-	[0] = {
-		.start	= MSP_USB_BASE,
-		.end	= MSP_USB_BASE_END,
+/* MSP7140/MSP82XX has two USB2 hosts. */
+#ifdef CONFIG_MSP_HAS_DUAL_USB
+static u64 msp_usbhost1_dma_mask = 0xffffffffUL;
+
+static struct resource msp_usbhost1_resources[] = {
+	[0] = { /* EHCI-HS operational and capabilities registers */
+		.start	= MSP_USB1_HS_START,
+		.end	= MSP_USB1_HS_END,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
@@ -75,76 +91,173 @@
 		.end	= MSP_INT_USB,
 		.flags	= IORESOURCE_IRQ,
 	},
-};
-
-static u64 msp_usbdev_dma_mask = DMA_BIT_MASK(32);
-
-static struct platform_device msp_usbdev_device = {
-	.name	= "msp71xx_udc",
-	.id	= 0,
-	.dev	= {
-		.dma_mask = &msp_usbdev_dma_mask,
-		.coherent_dma_mask = DMA_BIT_MASK(32),
+	[2] = { /* MSBus-to-AMBA bridge register space */
+		.start	= MSP_USB1_MAB_START,
+		.end	= MSP_USB1_MAB_END,
+		.flags	= IORESOURCE_MEM,
 	},
-	.num_resources	= ARRAY_SIZE(msp_usbdev_resources),
-	.resource	= msp_usbdev_resources,
+	[3] = { /* Identification and general hardware parameters */
+		.start	= MSP_USB1_ID_START,
+		.end	= MSP_USB1_ID_END,
+		.flags	= IORESOURCE_MEM,
+	},
 };
+
+static struct mspusb_device msp_usbhost1_device = {
+	.dev	= {
+		.name	= "pmcmsp-ehci",
+		.id	= 1,
+		.dev	= {
+			.dma_mask = &msp_usbhost1_dma_mask,
+			.coherent_dma_mask = 0xffffffffUL,
+		},
+		.num_resources	= ARRAY_SIZE(msp_usbhost1_resources),
+		.resource	= msp_usbhost1_resources,
+	},
+};
+#endif /* CONFIG_MSP_HAS_DUAL_USB */
+#endif /* CONFIG_USB_EHCI_HCD */
+
+#if defined(CONFIG_USB_GADGET)
+static struct resource msp_usbdev0_resources[] = {
+	[0] = { /* EHCI-HS operational and capabilities registers */
+		.start  = MSP_USB0_HS_START,
+		.end    = MSP_USB0_HS_END,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = MSP_INT_USB,
+		.end    = MSP_INT_USB,
+		.flags  = IORESOURCE_IRQ,
+	},
+	[2] = { /* MSBus-to-AMBA bridge register space */
+		.start	= MSP_USB0_MAB_START,
+		.end	= MSP_USB0_MAB_END,
+		.flags	= IORESOURCE_MEM,
+	},
+	[3] = { /* Identification and general hardware parameters */
+		.start	= MSP_USB0_ID_START,
+		.end	= MSP_USB0_ID_END,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static u64 msp_usbdev_dma_mask = 0xffffffffUL;
+
+/* This may need to be converted to a mspusb_device, too. */
+static struct mspusb_device msp_usbdev0_device = {
+	.dev	= {
+		.name	= "msp71xx_udc",
+		.id	= 0,
+		.dev	= {
+			.dma_mask = &msp_usbdev_dma_mask,
+			.coherent_dma_mask = 0xffffffffUL,
+		},
+		.num_resources  = ARRAY_SIZE(msp_usbdev0_resources),
+		.resource       = msp_usbdev0_resources,
+	},
+};
+
+#ifdef CONFIG_MSP_HAS_DUAL_USB
+static struct resource msp_usbdev1_resources[] = {
+	[0] = { /* EHCI-HS operational and capabilities registers */
+		.start  = MSP_USB1_HS_START,
+		.end    = MSP_USB1_HS_END,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = MSP_INT_USB,
+		.end    = MSP_INT_USB,
+		.flags  = IORESOURCE_IRQ,
+	},
+	[2] = { /* MSBus-to-AMBA bridge register space */
+		.start	= MSP_USB1_MAB_START,
+		.end	= MSP_USB1_MAB_END,
+		.flags	= IORESOURCE_MEM,
+	},
+	[3] = { /* Identification and general hardware parameters */
+		.start	= MSP_USB1_ID_START,
+		.end	= MSP_USB1_ID_END,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+/* This may need to be converted to a mspusb_device, too. */
+static struct mspusb_device msp_usbdev1_device = {
+	.dev	= {
+		.name	= "msp71xx_udc",
+		.id	= 0,
+		.dev	= {
+			.dma_mask = &msp_usbdev_dma_mask,
+			.coherent_dma_mask = 0xffffffffUL,
+		},
+		.num_resources  = ARRAY_SIZE(msp_usbdev1_resources),
+		.resource       = msp_usbdev1_resources,
+	},
+};
+
+#endif /* CONFIG_MSP_HAS_DUAL_USB */
 #endif /* CONFIG_USB_GADGET */
 
-#if defined(CONFIG_USB_EHCI_HCD) || defined(CONFIG_USB_GADGET)
-static struct platform_device *msp_devs[1];
-#endif
-
-
 static int __init msp_usb_setup(void)
 {
-#if defined(CONFIG_USB_EHCI_HCD) || defined(CONFIG_USB_GADGET)
-	char *strp;
-	char envstr[32];
-	unsigned int val = 0;
-	int result = 0;
+	char		*strp;
+	char		envstr[32];
+	struct platform_device *msp_devs[NUM_USB_DEVS];
+	unsigned int val;
 
+	/* construct environment name usbmode */
+	/* set usbmode <host/device> as pmon environment var */
 	/*
-	 * construct environment name usbmode
-	 * set usbmode <host/device> as pmon environment var
+	 * Could this perhaps be integrated into the "features" env var?
+	 * Use the features key "U", and follow with "H" for host-mode,
+	 * "D" for device-mode.  If it works for Ethernet, why not USB...
+	 *  -- hammtrev, 2007/03/22
 	 */
 	snprintf((char *)&envstr[0], sizeof(envstr), "usbmode");
 
-#if defined(CONFIG_USB_EHCI_HCD)
-	/* default to host mode */
+	/* set default host mode */
 	val = 1;
-#endif
 
 	/* get environment string */
 	strp = prom_getenv((char *)&envstr[0]);
 	if (strp) {
+		/* compare string */
 		if (!strcmp(strp, "device"))
 			val = 0;
 	}
 
 	if (val) {
 #if defined(CONFIG_USB_EHCI_HCD)
-		/* get host mode device */
-		msp_devs[0] = &msp_usbhost_device;
-		ppfinit("platform add USB HOST done %s.\n",
-			    msp_devs[0]->name);
-
-		result = platform_add_devices(msp_devs, ARRAY_SIZE(msp_devs));
-#endif /* CONFIG_USB_EHCI_HCD */
-	}
+		msp_devs[0] = &msp_usbhost0_device.dev;
+		ppfinit("platform add USB HOST done %s.\n", msp_devs[0]->name);
+#ifdef CONFIG_MSP_HAS_DUAL_USB
+		msp_devs[1] = &msp_usbhost1_device.dev;
+		ppfinit("platform add USB HOST done %s.\n", msp_devs[1]->name);
+#endif
+#else
+		ppfinit("%s: echi_hcd not supported\n", __FILE__);
+#endif  /* CONFIG_USB_EHCI_HCD */
+	} else {
 #if defined(CONFIG_USB_GADGET)
-	else {
 		/* get device mode structure */
-		msp_devs[0] = &msp_usbdev_device;
-		ppfinit("platform add USB DEVICE done %s.\n",
-			    msp_devs[0]->name);
-
-		result = platform_add_devices(msp_devs, ARRAY_SIZE(msp_devs));
+		msp_devs[0] = &msp_usbdev0_device.dev;
+		ppfinit("platform add USB DEVICE done %s.\n"
+					, msp_devs[0]->name);
+#ifdef CONFIG_MSP_HAS_DUAL_USB
+		msp_devs[1] = &msp_usbdev1_device.dev;
+		ppfinit("platform add USB DEVICE done %s.\n"
+					, msp_devs[1]->name);
+#endif
+#else
+		ppfinit("%s: usb_gadget not supported\n", __FILE__);
+#endif  /* CONFIG_USB_GADGET */
 	}
-#endif /* CONFIG_USB_GADGET */
-#endif /* CONFIG_USB_EHCI_HCD || CONFIG_USB_GADGET */
+	/* add device */
+	platform_add_devices(msp_devs, ARRAY_SIZE(msp_devs));
 
-	return result;
+	return 0;
 }
 
 subsys_initcall(msp_usb_setup);
+#endif /* CONFIG_USB_EHCI_HCD || CONFIG_USB_GADGET */

diff --git a/arch/mips/pnx833x/common/interrupts.c b/arch/mips/pnx833x/common/interrupts.c
index 941916f..b226bcb 100644
--- a/arch/mips/pnx833x/common/interrupts.c
+++ b/arch/mips/pnx833x/common/interrupts.c

@@ -152,10 +152,6 @@
 	PNX833X_PIC_INT_REG(irq) = 0;
 }
 
-static int irqflags[PNX833X_PIC_NUM_IRQ];	/* initialized by zeroes */
-#define IRQFLAG_STARTED		1
-#define IRQFLAG_DISABLED	2
-
 static DEFINE_RAW_SPINLOCK(pnx833x_irq_lock);
 
 static unsigned int pnx833x_startup_pic_irq(unsigned int irq)
@@ -164,108 +160,54 @@
 	unsigned int pic_irq = irq - PNX833X_PIC_IRQ_BASE;
 
 	raw_spin_lock_irqsave(&pnx833x_irq_lock, flags);
-
-	irqflags[pic_irq] = IRQFLAG_STARTED;	/* started, not disabled */
 	pnx833x_hard_enable_pic_irq(pic_irq);
-
 	raw_spin_unlock_irqrestore(&pnx833x_irq_lock, flags);
 	return 0;
 }
 
-static void pnx833x_shutdown_pic_irq(unsigned int irq)
+static void pnx833x_enable_pic_irq(struct irq_data *d)
 {
 	unsigned long flags;
-	unsigned int pic_irq = irq - PNX833X_PIC_IRQ_BASE;
+	unsigned int pic_irq = d->irq - PNX833X_PIC_IRQ_BASE;
 
 	raw_spin_lock_irqsave(&pnx833x_irq_lock, flags);
+	pnx833x_hard_enable_pic_irq(pic_irq);
+	raw_spin_unlock_irqrestore(&pnx833x_irq_lock, flags);
+}
 
-	irqflags[pic_irq] = 0;			/* not started */
+static void pnx833x_disable_pic_irq(struct irq_data *d)
+{
+	unsigned long flags;
+	unsigned int pic_irq = d->irq - PNX833X_PIC_IRQ_BASE;
+
+	raw_spin_lock_irqsave(&pnx833x_irq_lock, flags);
 	pnx833x_hard_disable_pic_irq(pic_irq);
-
 	raw_spin_unlock_irqrestore(&pnx833x_irq_lock, flags);
 }
 
-static void pnx833x_enable_pic_irq(unsigned int irq)
-{
-	unsigned long flags;
-	unsigned int pic_irq = irq - PNX833X_PIC_IRQ_BASE;
-
-	raw_spin_lock_irqsave(&pnx833x_irq_lock, flags);
-
-	irqflags[pic_irq] &= ~IRQFLAG_DISABLED;
-	if (irqflags[pic_irq] == IRQFLAG_STARTED)
-		pnx833x_hard_enable_pic_irq(pic_irq);
-
-	raw_spin_unlock_irqrestore(&pnx833x_irq_lock, flags);
-}
-
-static void pnx833x_disable_pic_irq(unsigned int irq)
-{
-	unsigned long flags;
-	unsigned int pic_irq = irq - PNX833X_PIC_IRQ_BASE;
-
-	raw_spin_lock_irqsave(&pnx833x_irq_lock, flags);
-
-	irqflags[pic_irq] |= IRQFLAG_DISABLED;
-	pnx833x_hard_disable_pic_irq(pic_irq);
-
-	raw_spin_unlock_irqrestore(&pnx833x_irq_lock, flags);
-}
-
-static void pnx833x_ack_pic_irq(unsigned int irq)
-{
-}
-
-static void pnx833x_end_pic_irq(unsigned int irq)
-{
-}
-
 static DEFINE_RAW_SPINLOCK(pnx833x_gpio_pnx833x_irq_lock);
 
-static unsigned int pnx833x_startup_gpio_irq(unsigned int irq)
+static void pnx833x_enable_gpio_irq(struct irq_data *d)
 {
-	int pin = irq - PNX833X_GPIO_IRQ_BASE;
-	unsigned long flags;
-	raw_spin_lock_irqsave(&pnx833x_gpio_pnx833x_irq_lock, flags);
-	pnx833x_gpio_enable_irq(pin);
-	raw_spin_unlock_irqrestore(&pnx833x_gpio_pnx833x_irq_lock, flags);
-	return 0;
-}
-
-static void pnx833x_enable_gpio_irq(unsigned int irq)
-{
-	int pin = irq - PNX833X_GPIO_IRQ_BASE;
+	int pin = d->irq - PNX833X_GPIO_IRQ_BASE;
 	unsigned long flags;
 	raw_spin_lock_irqsave(&pnx833x_gpio_pnx833x_irq_lock, flags);
 	pnx833x_gpio_enable_irq(pin);
 	raw_spin_unlock_irqrestore(&pnx833x_gpio_pnx833x_irq_lock, flags);
 }
 
-static void pnx833x_disable_gpio_irq(unsigned int irq)
+static void pnx833x_disable_gpio_irq(struct irq_data *d)
 {
-	int pin = irq - PNX833X_GPIO_IRQ_BASE;
+	int pin = d->irq - PNX833X_GPIO_IRQ_BASE;
 	unsigned long flags;
 	raw_spin_lock_irqsave(&pnx833x_gpio_pnx833x_irq_lock, flags);
 	pnx833x_gpio_disable_irq(pin);
 	raw_spin_unlock_irqrestore(&pnx833x_gpio_pnx833x_irq_lock, flags);
 }
 
-static void pnx833x_ack_gpio_irq(unsigned int irq)
+static int pnx833x_set_type_gpio_irq(struct irq_data *d, unsigned int flow_type)
 {
-}
-
-static void pnx833x_end_gpio_irq(unsigned int irq)
-{
-	int pin = irq - PNX833X_GPIO_IRQ_BASE;
-	unsigned long flags;
-	raw_spin_lock_irqsave(&pnx833x_gpio_pnx833x_irq_lock, flags);
-	pnx833x_gpio_clear_irq(pin);
-	raw_spin_unlock_irqrestore(&pnx833x_gpio_pnx833x_irq_lock, flags);
-}
-
-static int pnx833x_set_type_gpio_irq(unsigned int irq, unsigned int flow_type)
-{
-	int pin = irq - PNX833X_GPIO_IRQ_BASE;
+	int pin = d->irq - PNX833X_GPIO_IRQ_BASE;
 	int gpio_mode;
 
 	switch (flow_type) {
@@ -296,23 +238,15 @@
 
 static struct irq_chip pnx833x_pic_irq_type = {
 	.name = "PNX-PIC",
-	.startup = pnx833x_startup_pic_irq,
-	.shutdown = pnx833x_shutdown_pic_irq,
-	.enable = pnx833x_enable_pic_irq,
-	.disable = pnx833x_disable_pic_irq,
-	.ack = pnx833x_ack_pic_irq,
-	.end = pnx833x_end_pic_irq
+	.irq_enable = pnx833x_enable_pic_irq,
+	.irq_disable = pnx833x_disable_pic_irq,
 };
 
 static struct irq_chip pnx833x_gpio_irq_type = {
 	.name = "PNX-GPIO",
-	.startup = pnx833x_startup_gpio_irq,
-	.shutdown = pnx833x_disable_gpio_irq,
-	.enable = pnx833x_enable_gpio_irq,
-	.disable = pnx833x_disable_gpio_irq,
-	.ack = pnx833x_ack_gpio_irq,
-	.end = pnx833x_end_gpio_irq,
-	.set_type = pnx833x_set_type_gpio_irq
+	.irq_enable = pnx833x_enable_gpio_irq,
+	.irq_disable = pnx833x_disable_gpio_irq,
+	.irq_set_type = pnx833x_set_type_gpio_irq,
 };
 
 void __init arch_init_irq(void)

diff --git a/arch/mips/pnx8550/common/int.c b/arch/mips/pnx8550/common/int.c
index cfed505..dbdc35c 100644
--- a/arch/mips/pnx8550/common/int.c
+++ b/arch/mips/pnx8550/common/int.c

@@ -114,8 +114,10 @@
 	PNX8550_GIC_REQ(irq_nr) = (1<<26 | 1<<16) | (1<<28) | gic_prio[irq_nr];
 }
 
-static inline void mask_irq(unsigned int irq_nr)
+static inline void mask_irq(struct irq_data *d)
 {
+	unsigned int irq_nr = d->irq;
+
 	if ((PNX8550_INT_CP0_MIN <= irq_nr) && (irq_nr <= PNX8550_INT_CP0_MAX)) {
 		modify_cp0_intmask(1 << irq_nr, 0);
 	} else if ((PNX8550_INT_GIC_MIN <= irq_nr) &&
@@ -129,8 +131,10 @@
 	}
 }
 
-static inline void unmask_irq(unsigned int irq_nr)
+static inline void unmask_irq(struct irq_data *d)
 {
+	unsigned int irq_nr = d->irq;
+
 	if ((PNX8550_INT_CP0_MIN <= irq_nr) && (irq_nr <= PNX8550_INT_CP0_MAX)) {
 		modify_cp0_intmask(0, 1 << irq_nr);
 	} else if ((PNX8550_INT_GIC_MIN <= irq_nr) &&
@@ -157,10 +161,8 @@
 
 static struct irq_chip level_irq_type = {
 	.name =		"PNX Level IRQ",
-	.ack =		mask_irq,
-	.mask =		mask_irq,
-	.mask_ack =	mask_irq,
-	.unmask =	unmask_irq,
+	.irq_mask =	mask_irq,
+	.irq_unmask =	unmask_irq,
 };
 
 static struct irqaction gic_action = {
@@ -180,10 +182,8 @@
 	int i;
 	int configPR;
 
-	for (i = 0; i < PNX8550_INT_CP0_TOTINT; i++) {
+	for (i = 0; i < PNX8550_INT_CP0_TOTINT; i++)
 		set_irq_chip_and_handler(i, &level_irq_type, handle_level_irq);
-		mask_irq(i);	/* mask the irq just in case  */
-	}
 
 	/* init of GIC/IPC interrupts */
 	/* should be done before cp0 since cp0 init enables the GIC int */

diff --git a/arch/mips/powertv/asic/irq_asic.c b/arch/mips/powertv/asic/irq_asic.c
index e553824..6f1c8ef 100644
--- a/arch/mips/powertv/asic/irq_asic.c
+++ b/arch/mips/powertv/asic/irq_asic.c

@@ -21,9 +21,10 @@
 
 #include <asm/mach-powertv/asic_regs.h>
 
-static inline void unmask_asic_irq(unsigned int irq)
+static inline void unmask_asic_irq(struct irq_data *d)
 {
 	unsigned long enable_bit;
+	unsigned int irq = d->irq;
 
 	enable_bit = (1 << (irq & 0x1f));
 
@@ -45,9 +46,10 @@
 	}
 }
 
-static inline void mask_asic_irq(unsigned int irq)
+static inline void mask_asic_irq(struct irq_data *d)
 {
 	unsigned long disable_mask;
+	unsigned int irq = d->irq;
 
 	disable_mask = ~(1 << (irq & 0x1f));
 
@@ -71,11 +73,8 @@
 
 static struct irq_chip asic_irq_chip = {
 	.name = "ASIC Level",
-	.ack = mask_asic_irq,
-	.mask = mask_asic_irq,
-	.mask_ack = mask_asic_irq,
-	.unmask = unmask_asic_irq,
-	.eoi = unmask_asic_irq,
+	.irq_mask = mask_asic_irq,
+	.irq_unmask = unmask_asic_irq,
 };
 
 void __init asic_irq_init(void)

diff --git a/arch/mips/rb532/irq.c b/arch/mips/rb532/irq.c
index ea6cec3..b32a768 100644
--- a/arch/mips/rb532/irq.c
+++ b/arch/mips/rb532/irq.c

@@ -111,10 +111,10 @@
 	clear_c0_cause(ipnum);
 }
 
-static void rb532_enable_irq(unsigned int irq_nr)
+static void rb532_enable_irq(struct irq_data *d)
 {
+	unsigned int group, intr_bit, irq_nr = d->irq;
 	int ip = irq_nr - GROUP0_IRQ_BASE;
-	unsigned int group, intr_bit;
 	volatile unsigned int *addr;
 
 	if (ip < 0)
@@ -132,10 +132,10 @@
 	}
 }
 
-static void rb532_disable_irq(unsigned int irq_nr)
+static void rb532_disable_irq(struct irq_data *d)
 {
+	unsigned int group, intr_bit, mask, irq_nr = d->irq;
 	int ip = irq_nr - GROUP0_IRQ_BASE;
-	unsigned int group, intr_bit, mask;
 	volatile unsigned int *addr;
 
 	if (ip < 0) {
@@ -163,18 +163,18 @@
 	}
 }
 
-static void rb532_mask_and_ack_irq(unsigned int irq_nr)
+static void rb532_mask_and_ack_irq(struct irq_data *d)
 {
-	rb532_disable_irq(irq_nr);
-	ack_local_irq(group_to_ip(irq_to_group(irq_nr)));
+	rb532_disable_irq(d);
+	ack_local_irq(group_to_ip(irq_to_group(d->irq)));
 }
 
-static int rb532_set_type(unsigned int irq_nr, unsigned type)
+static int rb532_set_type(struct irq_data *d,  unsigned type)
 {
-	int gpio = irq_nr - GPIO_MAPPED_IRQ_BASE;
-	int group = irq_to_group(irq_nr);
+	int gpio = d->irq - GPIO_MAPPED_IRQ_BASE;
+	int group = irq_to_group(d->irq);
 
-	if (group != GPIO_MAPPED_IRQ_GROUP || irq_nr > (GROUP4_IRQ_BASE + 13))
+	if (group != GPIO_MAPPED_IRQ_GROUP || d->irq > (GROUP4_IRQ_BASE + 13))
 		return (type == IRQ_TYPE_LEVEL_HIGH) ? 0 : -EINVAL;
 
 	switch (type) {
@@ -193,11 +193,11 @@
 
 static struct irq_chip rc32434_irq_type = {
 	.name		= "RB532",
-	.ack		= rb532_disable_irq,
-	.mask		= rb532_disable_irq,
-	.mask_ack	= rb532_mask_and_ack_irq,
-	.unmask		= rb532_enable_irq,
-	.set_type	= rb532_set_type,
+	.irq_ack	= rb532_disable_irq,
+	.irq_mask	= rb532_disable_irq,
+	.irq_mask_ack	= rb532_mask_and_ack_irq,
+	.irq_unmask	= rb532_enable_irq,
+	.irq_set_type	= rb532_set_type,
 };
 
 void __init arch_init_irq(void)

diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c
index 383f11d..e6e6475 100644
--- a/arch/mips/sgi-ip22/ip22-int.c
+++ b/arch/mips/sgi-ip22/ip22-int.c

@@ -31,88 +31,80 @@
 
 extern int ip22_eisa_init(void);
 
-static void enable_local0_irq(unsigned int irq)
+static void enable_local0_irq(struct irq_data *d)
 {
 	/* don't allow mappable interrupt to be enabled from setup_irq,
 	 * we have our own way to do so */
-	if (irq != SGI_MAP_0_IRQ)
-		sgint->imask0 |= (1 << (irq - SGINT_LOCAL0));
+	if (d->irq != SGI_MAP_0_IRQ)
+		sgint->imask0 |= (1 << (d->irq - SGINT_LOCAL0));
 }
 
-static void disable_local0_irq(unsigned int irq)
+static void disable_local0_irq(struct irq_data *d)
 {
-	sgint->imask0 &= ~(1 << (irq - SGINT_LOCAL0));
+	sgint->imask0 &= ~(1 << (d->irq - SGINT_LOCAL0));
 }
 
 static struct irq_chip ip22_local0_irq_type = {
 	.name		= "IP22 local 0",
-	.ack		= disable_local0_irq,
-	.mask		= disable_local0_irq,
-	.mask_ack	= disable_local0_irq,
-	.unmask		= enable_local0_irq,
+	.irq_mask	= disable_local0_irq,
+	.irq_unmask	= enable_local0_irq,
 };
 
-static void enable_local1_irq(unsigned int irq)
+static void enable_local1_irq(struct irq_data *d)
 {
 	/* don't allow mappable interrupt to be enabled from setup_irq,
 	 * we have our own way to do so */
-	if (irq != SGI_MAP_1_IRQ)
-		sgint->imask1 |= (1 << (irq - SGINT_LOCAL1));
+	if (d->irq != SGI_MAP_1_IRQ)
+		sgint->imask1 |= (1 << (d->irq - SGINT_LOCAL1));
 }
 
-static void disable_local1_irq(unsigned int irq)
+static void disable_local1_irq(struct irq_data *d)
 {
-	sgint->imask1 &= ~(1 << (irq - SGINT_LOCAL1));
+	sgint->imask1 &= ~(1 << (d->irq - SGINT_LOCAL1));
 }
 
 static struct irq_chip ip22_local1_irq_type = {
 	.name		= "IP22 local 1",
-	.ack		= disable_local1_irq,
-	.mask		= disable_local1_irq,
-	.mask_ack	= disable_local1_irq,
-	.unmask		= enable_local1_irq,
+	.irq_mask	= disable_local1_irq,
+	.irq_unmask	= enable_local1_irq,
 };
 
-static void enable_local2_irq(unsigned int irq)
+static void enable_local2_irq(struct irq_data *d)
 {
 	sgint->imask0 |= (1 << (SGI_MAP_0_IRQ - SGINT_LOCAL0));
-	sgint->cmeimask0 |= (1 << (irq - SGINT_LOCAL2));
+	sgint->cmeimask0 |= (1 << (d->irq - SGINT_LOCAL2));
 }
 
-static void disable_local2_irq(unsigned int irq)
+static void disable_local2_irq(struct irq_data *d)
 {
-	sgint->cmeimask0 &= ~(1 << (irq - SGINT_LOCAL2));
+	sgint->cmeimask0 &= ~(1 << (d->irq - SGINT_LOCAL2));
 	if (!sgint->cmeimask0)
 		sgint->imask0 &= ~(1 << (SGI_MAP_0_IRQ - SGINT_LOCAL0));
 }
 
 static struct irq_chip ip22_local2_irq_type = {
 	.name		= "IP22 local 2",
-	.ack		= disable_local2_irq,
-	.mask		= disable_local2_irq,
-	.mask_ack	= disable_local2_irq,
-	.unmask		= enable_local2_irq,
+	.irq_mask	= disable_local2_irq,
+	.irq_unmask	= enable_local2_irq,
 };
 
-static void enable_local3_irq(unsigned int irq)
+static void enable_local3_irq(struct irq_data *d)
 {
 	sgint->imask1 |= (1 << (SGI_MAP_1_IRQ - SGINT_LOCAL1));
-	sgint->cmeimask1 |= (1 << (irq - SGINT_LOCAL3));
+	sgint->cmeimask1 |= (1 << (d->irq - SGINT_LOCAL3));
 }
 
-static void disable_local3_irq(unsigned int irq)
+static void disable_local3_irq(struct irq_data *d)
 {
-	sgint->cmeimask1 &= ~(1 << (irq - SGINT_LOCAL3));
+	sgint->cmeimask1 &= ~(1 << (d->irq - SGINT_LOCAL3));
 	if (!sgint->cmeimask1)
 		sgint->imask1 &= ~(1 << (SGI_MAP_1_IRQ - SGINT_LOCAL1));
 }
 
 static struct irq_chip ip22_local3_irq_type = {
 	.name		= "IP22 local 3",
-	.ack		= disable_local3_irq,
-	.mask		= disable_local3_irq,
-	.mask_ack	= disable_local3_irq,
-	.unmask		= enable_local3_irq,
+	.irq_mask	= disable_local3_irq,
+	.irq_unmask	= enable_local3_irq,
 };
 
 static void indy_local0_irqdispatch(void)

diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 6a123ea..f2d09d7 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c

@@ -240,7 +240,7 @@
 }
 
 /* Startup one of the (PCI ...) IRQs routes over a bridge.  */
-static unsigned int startup_bridge_irq(unsigned int irq)
+static unsigned int startup_bridge_irq(struct irq_data *d)
 {
 	struct bridge_controller *bc;
 	bridgereg_t device;
@@ -248,16 +248,16 @@
 	int pin, swlevel;
 	cpuid_t cpu;
 
-	pin = SLOT_FROM_PCI_IRQ(irq);
-	bc = IRQ_TO_BRIDGE(irq);
+	pin = SLOT_FROM_PCI_IRQ(d->irq);
+	bc = IRQ_TO_BRIDGE(d->irq);
 	bridge = bc->base;
 
-	pr_debug("bridge_startup(): irq= 0x%x  pin=%d\n", irq, pin);
+	pr_debug("bridge_startup(): irq= 0x%x  pin=%d\n", d->irq, pin);
 	/*
 	 * "map" irq to a swlevel greater than 6 since the first 6 bits
 	 * of INT_PEND0 are taken
 	 */
-	swlevel = find_level(&cpu, irq);
+	swlevel = find_level(&cpu, d->irq);
 	bridge->b_int_addr[pin].addr = (0x20000 | swlevel | (bc->nasid << 8));
 	bridge->b_int_enable |= (1 << pin);
 	bridge->b_int_enable |= 0x7ffffe00;	/* more stuff in int_enable */
@@ -288,53 +288,51 @@
 }
 
 /* Shutdown one of the (PCI ...) IRQs routes over a bridge.  */
-static void shutdown_bridge_irq(unsigned int irq)
+static void shutdown_bridge_irq(struct irq_data *d)
 {
-	struct bridge_controller *bc = IRQ_TO_BRIDGE(irq);
+	struct bridge_controller *bc = IRQ_TO_BRIDGE(d->irq);
 	bridge_t *bridge = bc->base;
 	int pin, swlevel;
 	cpuid_t cpu;
 
-	pr_debug("bridge_shutdown: irq 0x%x\n", irq);
-	pin = SLOT_FROM_PCI_IRQ(irq);
+	pr_debug("bridge_shutdown: irq 0x%x\n", d->irq);
+	pin = SLOT_FROM_PCI_IRQ(d->irq);
 
 	/*
 	 * map irq to a swlevel greater than 6 since the first 6 bits
 	 * of INT_PEND0 are taken
 	 */
-	swlevel = find_level(&cpu, irq);
+	swlevel = find_level(&cpu, d->irq);
 	intr_disconnect_level(cpu, swlevel);
 
 	bridge->b_int_enable &= ~(1 << pin);
 	bridge->b_wid_tflush;
 }
 
-static inline void enable_bridge_irq(unsigned int irq)
+static inline void enable_bridge_irq(struct irq_data *d)
 {
 	cpuid_t cpu;
 	int swlevel;
 
-	swlevel = find_level(&cpu, irq);	/* Criminal offence */
+	swlevel = find_level(&cpu, d->irq);	/* Criminal offence */
 	intr_connect_level(cpu, swlevel);
 }
 
-static inline void disable_bridge_irq(unsigned int irq)
+static inline void disable_bridge_irq(struct irq_data *d)
 {
 	cpuid_t cpu;
 	int swlevel;
 
-	swlevel = find_level(&cpu, irq);	/* Criminal offence */
+	swlevel = find_level(&cpu, d->irq);	/* Criminal offence */
 	intr_disconnect_level(cpu, swlevel);
 }
 
 static struct irq_chip bridge_irq_type = {
 	.name		= "bridge",
-	.startup	= startup_bridge_irq,
-	.shutdown	= shutdown_bridge_irq,
-	.ack		= disable_bridge_irq,
-	.mask		= disable_bridge_irq,
-	.mask_ack	= disable_bridge_irq,
-	.unmask		= enable_bridge_irq,
+	.irq_startup	= startup_bridge_irq,
+	.irq_shutdown	= shutdown_bridge_irq,
+	.irq_mask	= disable_bridge_irq,
+	.irq_unmask	= enable_bridge_irq,
 };
 
 void __devinit register_bridge_irq(unsigned int irq)

diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index d6802d6..c01f558 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c

@@ -36,21 +36,18 @@
 #include <asm/sn/sn0/hubio.h>
 #include <asm/pci/bridge.h>
 
-static void enable_rt_irq(unsigned int irq)
+static void enable_rt_irq(struct irq_data *d)
 {
 }
 
-static void disable_rt_irq(unsigned int irq)
+static void disable_rt_irq(struct irq_data *d)
 {
 }
 
 static struct irq_chip rt_irq_type = {
 	.name		= "SN HUB RT timer",
-	.ack		= disable_rt_irq,
-	.mask		= disable_rt_irq,
-	.mask_ack	= disable_rt_irq,
-	.unmask		= enable_rt_irq,
-	.eoi		= enable_rt_irq,
+	.irq_mask	= disable_rt_irq,
+	.irq_unmask	= enable_rt_irq,
 };
 
 static int rt_next_event(unsigned long delta, struct clock_event_device *evt)

diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c
index eb40824..e0a3ce4 100644
--- a/arch/mips/sgi-ip32/ip32-irq.c
+++ b/arch/mips/sgi-ip32/ip32-irq.c

@@ -130,70 +130,48 @@
 
 static uint64_t crime_mask;
 
-static inline void crime_enable_irq(unsigned int irq)
+static inline void crime_enable_irq(struct irq_data *d)
 {
-	unsigned int bit = irq - CRIME_IRQ_BASE;
+	unsigned int bit = d->irq - CRIME_IRQ_BASE;
 
 	crime_mask |= 1 << bit;
 	crime->imask = crime_mask;
 }
 
-static inline void crime_disable_irq(unsigned int irq)
+static inline void crime_disable_irq(struct irq_data *d)
 {
-	unsigned int bit = irq - CRIME_IRQ_BASE;
+	unsigned int bit = d->irq - CRIME_IRQ_BASE;
 
 	crime_mask &= ~(1 << bit);
 	crime->imask = crime_mask;
 	flush_crime_bus();
 }
 
-static void crime_level_mask_and_ack_irq(unsigned int irq)
-{
-	crime_disable_irq(irq);
-}
-
-static void crime_level_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
-		crime_enable_irq(irq);
-}
-
 static struct irq_chip crime_level_interrupt = {
 	.name		= "IP32 CRIME",
-	.ack		= crime_level_mask_and_ack_irq,
-	.mask		= crime_disable_irq,
-	.mask_ack	= crime_level_mask_and_ack_irq,
-	.unmask		= crime_enable_irq,
-	.end		= crime_level_end_irq,
+	.irq_mask	= crime_disable_irq,
+	.irq_unmask	= crime_enable_irq,
 };
 
-static void crime_edge_mask_and_ack_irq(unsigned int irq)
+static void crime_edge_mask_and_ack_irq(struct irq_data *d)
 {
-	unsigned int bit = irq - CRIME_IRQ_BASE;
+	unsigned int bit = d->irq - CRIME_IRQ_BASE;
 	uint64_t crime_int;
 
 	/* Edge triggered interrupts must be cleared. */
-
 	crime_int = crime->hard_int;
 	crime_int &= ~(1 << bit);
 	crime->hard_int = crime_int;
 
-	crime_disable_irq(irq);
-}
-
-static void crime_edge_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
-		crime_enable_irq(irq);
+	crime_disable_irq(d);
 }
 
 static struct irq_chip crime_edge_interrupt = {
 	.name		= "IP32 CRIME",
-	.ack		= crime_edge_mask_and_ack_irq,
-	.mask		= crime_disable_irq,
-	.mask_ack	= crime_edge_mask_and_ack_irq,
-	.unmask		= crime_enable_irq,
-	.end		= crime_edge_end_irq,
+	.irq_ack	= crime_edge_mask_and_ack_irq,
+	.irq_mask	= crime_disable_irq,
+	.irq_mask_ack	= crime_edge_mask_and_ack_irq,
+	.irq_unmask	= crime_enable_irq,
 };
 
 /*
@@ -204,37 +182,28 @@
 
 static unsigned long macepci_mask;
 
-static void enable_macepci_irq(unsigned int irq)
+static void enable_macepci_irq(struct irq_data *d)
 {
-	macepci_mask |= MACEPCI_CONTROL_INT(irq - MACEPCI_SCSI0_IRQ);
+	macepci_mask |= MACEPCI_CONTROL_INT(d->irq - MACEPCI_SCSI0_IRQ);
 	mace->pci.control = macepci_mask;
-	crime_mask |= 1 << (irq - CRIME_IRQ_BASE);
+	crime_mask |= 1 << (d->irq - CRIME_IRQ_BASE);
 	crime->imask = crime_mask;
 }
 
-static void disable_macepci_irq(unsigned int irq)
+static void disable_macepci_irq(struct irq_data *d)
 {
-	crime_mask &= ~(1 << (irq - CRIME_IRQ_BASE));
+	crime_mask &= ~(1 << (d->irq - CRIME_IRQ_BASE));
 	crime->imask = crime_mask;
 	flush_crime_bus();
-	macepci_mask &= ~MACEPCI_CONTROL_INT(irq - MACEPCI_SCSI0_IRQ);
+	macepci_mask &= ~MACEPCI_CONTROL_INT(d->irq - MACEPCI_SCSI0_IRQ);
 	mace->pci.control = macepci_mask;
 	flush_mace_bus();
 }
 
-static void end_macepci_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		enable_macepci_irq(irq);
-}
-
 static struct irq_chip ip32_macepci_interrupt = {
 	.name = "IP32 MACE PCI",
-	.ack = disable_macepci_irq,
-	.mask = disable_macepci_irq,
-	.mask_ack = disable_macepci_irq,
-	.unmask = enable_macepci_irq,
-	.end = end_macepci_irq,
+	.irq_mask = disable_macepci_irq,
+	.irq_unmask = enable_macepci_irq,
 };
 
 /* This is used for MACE ISA interrupts.  That means bits 4-6 in the
@@ -276,13 +245,13 @@
 
 static unsigned long maceisa_mask;
 
-static void enable_maceisa_irq(unsigned int irq)
+static void enable_maceisa_irq(struct irq_data *d)
 {
 	unsigned int crime_int = 0;
 
-	pr_debug("maceisa enable: %u\n", irq);
+	pr_debug("maceisa enable: %u\n", d->irq);
 
-	switch (irq) {
+	switch (d->irq) {
 	case MACEISA_AUDIO_SW_IRQ ... MACEISA_AUDIO3_MERR_IRQ:
 		crime_int = MACE_AUDIO_INT;
 		break;
@@ -296,15 +265,15 @@
 	pr_debug("crime_int %08x enabled\n", crime_int);
 	crime_mask |= crime_int;
 	crime->imask = crime_mask;
-	maceisa_mask |= 1 << (irq - MACEISA_AUDIO_SW_IRQ);
+	maceisa_mask |= 1 << (d->irq - MACEISA_AUDIO_SW_IRQ);
 	mace->perif.ctrl.imask = maceisa_mask;
 }
 
-static void disable_maceisa_irq(unsigned int irq)
+static void disable_maceisa_irq(struct irq_data *d)
 {
 	unsigned int crime_int = 0;
 
-	maceisa_mask &= ~(1 << (irq - MACEISA_AUDIO_SW_IRQ));
+	maceisa_mask &= ~(1 << (d->irq - MACEISA_AUDIO_SW_IRQ));
         if (!(maceisa_mask & MACEISA_AUDIO_INT))
 		crime_int |= MACE_AUDIO_INT;
         if (!(maceisa_mask & MACEISA_MISC_INT))
@@ -318,76 +287,57 @@
 	flush_mace_bus();
 }
 
-static void mask_and_ack_maceisa_irq(unsigned int irq)
+static void mask_and_ack_maceisa_irq(struct irq_data *d)
 {
 	unsigned long mace_int;
 
 	/* edge triggered */
 	mace_int = mace->perif.ctrl.istat;
-	mace_int &= ~(1 << (irq - MACEISA_AUDIO_SW_IRQ));
+	mace_int &= ~(1 << (d->irq - MACEISA_AUDIO_SW_IRQ));
 	mace->perif.ctrl.istat = mace_int;
 
-	disable_maceisa_irq(irq);
-}
-
-static void end_maceisa_irq(unsigned irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
-		enable_maceisa_irq(irq);
+	disable_maceisa_irq(d);
 }
 
 static struct irq_chip ip32_maceisa_level_interrupt = {
 	.name		= "IP32 MACE ISA",
-	.ack		= disable_maceisa_irq,
-	.mask		= disable_maceisa_irq,
-	.mask_ack	= disable_maceisa_irq,
-	.unmask		= enable_maceisa_irq,
-	.end		= end_maceisa_irq,
+	.irq_mask	= disable_maceisa_irq,
+	.irq_unmask	= enable_maceisa_irq,
 };
 
 static struct irq_chip ip32_maceisa_edge_interrupt = {
 	.name		= "IP32 MACE ISA",
-	.ack		= mask_and_ack_maceisa_irq,
-	.mask		= disable_maceisa_irq,
-	.mask_ack	= mask_and_ack_maceisa_irq,
-	.unmask		= enable_maceisa_irq,
-	.end		= end_maceisa_irq,
+	.irq_ack	= mask_and_ack_maceisa_irq,
+	.irq_mask	= disable_maceisa_irq,
+	.irq_mask_ack	= mask_and_ack_maceisa_irq,
+	.irq_unmask	= enable_maceisa_irq,
 };
 
 /* This is used for regular non-ISA, non-PCI MACE interrupts.  That means
  * bits 0-3 and 7 in the CRIME register.
  */
 
-static void enable_mace_irq(unsigned int irq)
+static void enable_mace_irq(struct irq_data *d)
 {
-	unsigned int bit = irq - CRIME_IRQ_BASE;
+	unsigned int bit = d->irq - CRIME_IRQ_BASE;
 
 	crime_mask |= (1 << bit);
 	crime->imask = crime_mask;
 }
 
-static void disable_mace_irq(unsigned int irq)
+static void disable_mace_irq(struct irq_data *d)
 {
-	unsigned int bit = irq - CRIME_IRQ_BASE;
+	unsigned int bit = d->irq - CRIME_IRQ_BASE;
 
 	crime_mask &= ~(1 << bit);
 	crime->imask = crime_mask;
 	flush_crime_bus();
 }
 
-static void end_mace_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		enable_mace_irq(irq);
-}
-
 static struct irq_chip ip32_mace_interrupt = {
 	.name = "IP32 MACE",
-	.ack = disable_mace_irq,
-	.mask = disable_mace_irq,
-	.mask_ack = disable_mace_irq,
-	.unmask = enable_mace_irq,
-	.end = end_mace_irq,
+	.irq_mask = disable_mace_irq,
+	.irq_unmask = enable_mace_irq,
 };
 
 static void ip32_unknown_interrupt(void)

diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index 044bbe4..89e8188 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c

@@ -44,31 +44,10 @@
  * for interrupt lines
  */
 
-
-static void end_bcm1480_irq(unsigned int irq);
-static void enable_bcm1480_irq(unsigned int irq);
-static void disable_bcm1480_irq(unsigned int irq);
-static void ack_bcm1480_irq(unsigned int irq);
-#ifdef CONFIG_SMP
-static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
-#endif
-
 #ifdef CONFIG_PCI
 extern unsigned long ht_eoi_space;
 #endif
 
-static struct irq_chip bcm1480_irq_type = {
-	.name = "BCM1480-IMR",
-	.ack = ack_bcm1480_irq,
-	.mask = disable_bcm1480_irq,
-	.mask_ack = ack_bcm1480_irq,
-	.unmask = enable_bcm1480_irq,
-	.end = end_bcm1480_irq,
-#ifdef CONFIG_SMP
-	.set_affinity = bcm1480_set_affinity
-#endif
-};
-
 /* Store the CPU id (not the logical number) */
 int bcm1480_irq_owner[BCM1480_NR_IRQS];
 
@@ -109,12 +88,13 @@
 }
 
 #ifdef CONFIG_SMP
-static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int bcm1480_set_affinity(struct irq_data *d, const struct cpumask *mask,
+				bool force)
 {
+	unsigned int irq_dirty, irq = d->irq;
 	int i = 0, old_cpu, cpu, int_on, k;
 	u64 cur_ints;
 	unsigned long flags;
-	unsigned int irq_dirty;
 
 	i = cpumask_first(mask);
 
@@ -156,21 +136,25 @@
 
 /*****************************************************************************/
 
-static void disable_bcm1480_irq(unsigned int irq)
+static void disable_bcm1480_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
+
 	bcm1480_mask_irq(bcm1480_irq_owner[irq], irq);
 }
 
-static void enable_bcm1480_irq(unsigned int irq)
+static void enable_bcm1480_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
+
 	bcm1480_unmask_irq(bcm1480_irq_owner[irq], irq);
 }
 
 
-static void ack_bcm1480_irq(unsigned int irq)
+static void ack_bcm1480_irq(struct irq_data *d)
 {
+	unsigned int irq_dirty, irq = d->irq;
 	u64 pending;
-	unsigned int irq_dirty;
 	int k;
 
 	/*
@@ -217,14 +201,15 @@
 	bcm1480_mask_irq(bcm1480_irq_owner[irq], irq);
 }
 
-
-static void end_bcm1480_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
-		bcm1480_unmask_irq(bcm1480_irq_owner[irq], irq);
-	}
-}
-
+static struct irq_chip bcm1480_irq_type = {
+	.name = "BCM1480-IMR",
+	.irq_mask_ack = ack_bcm1480_irq,
+	.irq_mask = disable_bcm1480_irq,
+	.irq_unmask = enable_bcm1480_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = bcm1480_set_affinity
+#endif
+};
 
 void __init init_bcm1480_irqs(void)
 {

diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index 12ac04a..fd269ea 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c

@@ -43,31 +43,10 @@
  * for interrupt lines
  */
 
-
-static void end_sb1250_irq(unsigned int irq);
-static void enable_sb1250_irq(unsigned int irq);
-static void disable_sb1250_irq(unsigned int irq);
-static void ack_sb1250_irq(unsigned int irq);
-#ifdef CONFIG_SMP
-static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
-#endif
-
 #ifdef CONFIG_SIBYTE_HAS_LDT
 extern unsigned long ldt_eoi_space;
 #endif
 
-static struct irq_chip sb1250_irq_type = {
-	.name = "SB1250-IMR",
-	.ack = ack_sb1250_irq,
-	.mask = disable_sb1250_irq,
-	.mask_ack = ack_sb1250_irq,
-	.unmask = enable_sb1250_irq,
-	.end = end_sb1250_irq,
-#ifdef CONFIG_SMP
-	.set_affinity = sb1250_set_affinity
-#endif
-};
-
 /* Store the CPU id (not the logical number) */
 int sb1250_irq_owner[SB1250_NR_IRQS];
 
@@ -102,9 +81,11 @@
 }
 
 #ifdef CONFIG_SMP
-static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int sb1250_set_affinity(struct irq_data *d, const struct cpumask *mask,
+			       bool force)
 {
 	int i = 0, old_cpu, cpu, int_on;
+	unsigned int irq = d->irq;
 	u64 cur_ints;
 	unsigned long flags;
 
@@ -142,21 +123,17 @@
 }
 #endif
 
-/*****************************************************************************/
-
-static void disable_sb1250_irq(unsigned int irq)
+static void enable_sb1250_irq(struct irq_data *d)
 {
-	sb1250_mask_irq(sb1250_irq_owner[irq], irq);
-}
+	unsigned int irq = d->irq;
 
-static void enable_sb1250_irq(unsigned int irq)
-{
 	sb1250_unmask_irq(sb1250_irq_owner[irq], irq);
 }
 
 
-static void ack_sb1250_irq(unsigned int irq)
+static void ack_sb1250_irq(struct irq_data *d)
 {
+	unsigned int irq = d->irq;
 #ifdef CONFIG_SIBYTE_HAS_LDT
 	u64 pending;
 
@@ -199,14 +176,14 @@
 	sb1250_mask_irq(sb1250_irq_owner[irq], irq);
 }
 
-
-static void end_sb1250_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
-		sb1250_unmask_irq(sb1250_irq_owner[irq], irq);
-	}
-}
-
+static struct irq_chip sb1250_irq_type = {
+	.name = "SB1250-IMR",
+	.irq_mask_ack = ack_sb1250_irq,
+	.irq_unmask = enable_sb1250_irq,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = sb1250_set_affinity
+#endif
+};
 
 void __init init_sb1250_irqs(void)
 {

diff --git a/arch/mips/sni/a20r.c b/arch/mips/sni/a20r.c
index bbe7187..72b9415 100644
--- a/arch/mips/sni/a20r.c
+++ b/arch/mips/sni/a20r.c

@@ -168,33 +168,22 @@
 	return status;
 }
 
-static inline void unmask_a20r_irq(unsigned int irq)
+static inline void unmask_a20r_irq(struct irq_data *d)
 {
-	set_c0_status(0x100 << (irq - SNI_A20R_IRQ_BASE));
+	set_c0_status(0x100 << (d->irq - SNI_A20R_IRQ_BASE));
 	irq_enable_hazard();
 }
 
-static inline void mask_a20r_irq(unsigned int irq)
+static inline void mask_a20r_irq(struct irq_data *d)
 {
-	clear_c0_status(0x100 << (irq - SNI_A20R_IRQ_BASE));
+	clear_c0_status(0x100 << (d->irq - SNI_A20R_IRQ_BASE));
 	irq_disable_hazard();
 }
 
-static void end_a20r_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
-		a20r_ack_hwint();
-		unmask_a20r_irq(irq);
-	}
-}
-
 static struct irq_chip a20r_irq_type = {
 	.name		= "A20R",
-	.ack		= mask_a20r_irq,
-	.mask		= mask_a20r_irq,
-	.mask_ack	= mask_a20r_irq,
-	.unmask		= unmask_a20r_irq,
-	.end		= end_a20r_irq,
+	.irq_mask	= mask_a20r_irq,
+	.irq_unmask	= unmask_a20r_irq,
 };
 
 /*

diff --git a/arch/mips/sni/pcimt.c b/arch/mips/sni/pcimt.c
index 8c92c73..cfcc68a 100644
--- a/arch/mips/sni/pcimt.c
+++ b/arch/mips/sni/pcimt.c

@@ -194,33 +194,24 @@
 	.io_map_base    = SNI_PORT_BASE
 };
 
-static void enable_pcimt_irq(unsigned int irq)
+static void enable_pcimt_irq(struct irq_data *d)
 {
-	unsigned int mask = 1 << (irq - PCIMT_IRQ_INT2);
+	unsigned int mask = 1 << (d->irq - PCIMT_IRQ_INT2);
 
 	*(volatile u8 *) PCIMT_IRQSEL |= mask;
 }
 
-void disable_pcimt_irq(unsigned int irq)
+void disable_pcimt_irq(struct irq_data *d)
 {
-	unsigned int mask = ~(1 << (irq - PCIMT_IRQ_INT2));
+	unsigned int mask = ~(1 << (d->irq - PCIMT_IRQ_INT2));
 
 	*(volatile u8 *) PCIMT_IRQSEL &= mask;
 }
 
-static void end_pcimt_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		enable_pcimt_irq(irq);
-}
-
 static struct irq_chip pcimt_irq_type = {
 	.name = "PCIMT",
-	.ack = disable_pcimt_irq,
-	.mask = disable_pcimt_irq,
-	.mask_ack = disable_pcimt_irq,
-	.unmask = enable_pcimt_irq,
-	.end = end_pcimt_irq,
+	.irq_mask = disable_pcimt_irq,
+	.irq_unmask = enable_pcimt_irq,
 };
 
 /*

diff --git a/arch/mips/sni/pcit.c b/arch/mips/sni/pcit.c
index dc98745..0846e99 100644
--- a/arch/mips/sni/pcit.c
+++ b/arch/mips/sni/pcit.c

@@ -156,33 +156,24 @@
 	.io_map_base    = SNI_PORT_BASE
 };
 
-static void enable_pcit_irq(unsigned int irq)
+static void enable_pcit_irq(struct irq_data *d)
 {
-	u32 mask = 1 << (irq - SNI_PCIT_INT_START + 24);
+	u32 mask = 1 << (d->irq - SNI_PCIT_INT_START + 24);
 
 	*(volatile u32 *)SNI_PCIT_INT_REG |= mask;
 }
 
-void disable_pcit_irq(unsigned int irq)
+void disable_pcit_irq(struct irq_data *d)
 {
-	u32 mask = 1 << (irq - SNI_PCIT_INT_START + 24);
+	u32 mask = 1 << (d->irq - SNI_PCIT_INT_START + 24);
 
 	*(volatile u32 *)SNI_PCIT_INT_REG &= ~mask;
 }
 
-void end_pcit_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		enable_pcit_irq(irq);
-}
-
 static struct irq_chip pcit_irq_type = {
 	.name = "PCIT",
-	.ack = disable_pcit_irq,
-	.mask = disable_pcit_irq,
-	.mask_ack = disable_pcit_irq,
-	.unmask = enable_pcit_irq,
-	.end = end_pcit_irq,
+	.irq_mask = disable_pcit_irq,
+	.irq_unmask = enable_pcit_irq,
 };
 
 static void pcit_hwint1(void)

diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c
index 0e6f42c..f05d8e5 100644
--- a/arch/mips/sni/rm200.c
+++ b/arch/mips/sni/rm200.c

@@ -155,12 +155,11 @@
 #define cached_master_mask	(rm200_cached_irq_mask)
 #define cached_slave_mask	(rm200_cached_irq_mask >> 8)
 
-static void sni_rm200_disable_8259A_irq(unsigned int irq)
+static void sni_rm200_disable_8259A_irq(struct irq_data *d)
 {
-	unsigned int mask;
+	unsigned int mask, irq = d->irq - RM200_I8259A_IRQ_BASE;
 	unsigned long flags;
 
-	irq -= RM200_I8259A_IRQ_BASE;
 	mask = 1 << irq;
 	raw_spin_lock_irqsave(&sni_rm200_i8259A_lock, flags);
 	rm200_cached_irq_mask |= mask;
@@ -171,12 +170,11 @@
 	raw_spin_unlock_irqrestore(&sni_rm200_i8259A_lock, flags);
 }
 
-static void sni_rm200_enable_8259A_irq(unsigned int irq)
+static void sni_rm200_enable_8259A_irq(struct irq_data *d)
 {
-	unsigned int mask;
+	unsigned int mask, irq = d->irq - RM200_I8259A_IRQ_BASE;
 	unsigned long flags;
 
-	irq -= RM200_I8259A_IRQ_BASE;
 	mask = ~(1 << irq);
 	raw_spin_lock_irqsave(&sni_rm200_i8259A_lock, flags);
 	rm200_cached_irq_mask &= mask;
@@ -210,12 +208,11 @@
  * first, _then_ send the EOI, and the order of EOI
  * to the two 8259s is important!
  */
-void sni_rm200_mask_and_ack_8259A(unsigned int irq)
+void sni_rm200_mask_and_ack_8259A(struct irq_data *d)
 {
-	unsigned int irqmask;
+	unsigned int irqmask, irq = d->irq - RM200_I8259A_IRQ_BASE;
 	unsigned long flags;
 
-	irq -= RM200_I8259A_IRQ_BASE;
 	irqmask = 1 << irq;
 	raw_spin_lock_irqsave(&sni_rm200_i8259A_lock, flags);
 	/*
@@ -285,9 +282,9 @@
 
 static struct irq_chip sni_rm200_i8259A_chip = {
 	.name		= "RM200-XT-PIC",
-	.mask		= sni_rm200_disable_8259A_irq,
-	.unmask		= sni_rm200_enable_8259A_irq,
-	.mask_ack	= sni_rm200_mask_and_ack_8259A,
+	.irq_mask	= sni_rm200_disable_8259A_irq,
+	.irq_unmask	= sni_rm200_enable_8259A_irq,
+	.irq_mask_ack	= sni_rm200_mask_and_ack_8259A,
 };
 
 /*
@@ -429,33 +426,24 @@
 #define SNI_RM200_INT_START  24
 #define SNI_RM200_INT_END    28
 
-static void enable_rm200_irq(unsigned int irq)
+static void enable_rm200_irq(struct irq_data *d)
 {
-	unsigned int mask = 1 << (irq - SNI_RM200_INT_START);
+	unsigned int mask = 1 << (d->irq - SNI_RM200_INT_START);
 
 	*(volatile u8 *)SNI_RM200_INT_ENA_REG &= ~mask;
 }
 
-void disable_rm200_irq(unsigned int irq)
+void disable_rm200_irq(struct irq_data *d)
 {
-	unsigned int mask = 1 << (irq - SNI_RM200_INT_START);
+	unsigned int mask = 1 << (d->irq - SNI_RM200_INT_START);
 
 	*(volatile u8 *)SNI_RM200_INT_ENA_REG |= mask;
 }
 
-void end_rm200_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		enable_rm200_irq(irq);
-}
-
 static struct irq_chip rm200_irq_type = {
 	.name = "RM200",
-	.ack = disable_rm200_irq,
-	.mask = disable_rm200_irq,
-	.mask_ack = disable_rm200_irq,
-	.unmask = enable_rm200_irq,
-	.end = end_rm200_irq,
+	.irq_mask = disable_rm200_irq,
+	.irq_unmask = enable_rm200_irq,
 };
 
 static void sni_rm200_hwint(void)

diff --git a/arch/mips/txx9/generic/irq_tx4939.c b/arch/mips/txx9/generic/irq_tx4939.c
index 3886ad7..93b6edb 100644
--- a/arch/mips/txx9/generic/irq_tx4939.c
+++ b/arch/mips/txx9/generic/irq_tx4939.c

@@ -50,9 +50,9 @@
 	unsigned char mode;
 } tx4939irq[TX4939_NUM_IR] __read_mostly;
 
-static void tx4939_irq_unmask(unsigned int irq)
+static void tx4939_irq_unmask(struct irq_data *d)
 {
-	unsigned int irq_nr = irq - TXX9_IRQ_BASE;
+	unsigned int irq_nr = d->irq - TXX9_IRQ_BASE;
 	u32 __iomem *lvlp;
 	int ofs;
 	if (irq_nr < 32) {
@@ -68,9 +68,9 @@
 		     lvlp);
 }
 
-static inline void tx4939_irq_mask(unsigned int irq)
+static inline void tx4939_irq_mask(struct irq_data *d)
 {
-	unsigned int irq_nr = irq - TXX9_IRQ_BASE;
+	unsigned int irq_nr = d->irq - TXX9_IRQ_BASE;
 	u32 __iomem *lvlp;
 	int ofs;
 	if (irq_nr < 32) {
@@ -87,11 +87,11 @@
 	mmiowb();
 }
 
-static void tx4939_irq_mask_ack(unsigned int irq)
+static void tx4939_irq_mask_ack(struct irq_data *d)
 {
-	unsigned int irq_nr = irq - TXX9_IRQ_BASE;
+	unsigned int irq_nr = d->irq - TXX9_IRQ_BASE;
 
-	tx4939_irq_mask(irq);
+	tx4939_irq_mask(d);
 	if (TXx9_IRCR_EDGE(tx4939irq[irq_nr].mode)) {
 		irq_nr--;
 		/* clear edge detection */
@@ -101,9 +101,9 @@
 	}
 }
 
-static int tx4939_irq_set_type(unsigned int irq, unsigned int flow_type)
+static int tx4939_irq_set_type(struct irq_data *d, unsigned int flow_type)
 {
-	unsigned int irq_nr = irq - TXX9_IRQ_BASE;
+	unsigned int irq_nr = d->irq - TXX9_IRQ_BASE;
 	u32 cr;
 	u32 __iomem *crp;
 	int ofs;
@@ -145,11 +145,11 @@
 
 static struct irq_chip tx4939_irq_chip = {
 	.name		= "TX4939",
-	.ack		= tx4939_irq_mask_ack,
-	.mask		= tx4939_irq_mask,
-	.mask_ack	= tx4939_irq_mask_ack,
-	.unmask		= tx4939_irq_unmask,
-	.set_type	= tx4939_irq_set_type,
+	.irq_ack	= tx4939_irq_mask_ack,
+	.irq_mask	= tx4939_irq_mask,
+	.irq_mask_ack	= tx4939_irq_mask_ack,
+	.irq_unmask	= tx4939_irq_unmask,
+	.irq_set_type	= tx4939_irq_set_type,
 };
 
 static int tx4939_irq_set_pri(int irc_irq, int new_pri)

diff --git a/arch/mips/txx9/jmr3927/irq.c b/arch/mips/txx9/jmr3927/irq.c
index 0a7f8e3..92a5c1b 100644
--- a/arch/mips/txx9/jmr3927/irq.c
+++ b/arch/mips/txx9/jmr3927/irq.c

@@ -47,20 +47,20 @@
  * CP0_STATUS is a thread's resource (saved/restored on context switch).
  * So disable_irq/enable_irq MUST handle IOC/IRC registers.
  */
-static void mask_irq_ioc(unsigned int irq)
+static void mask_irq_ioc(struct irq_data *d)
 {
 	/* 0: mask */
-	unsigned int irq_nr = irq - JMR3927_IRQ_IOC;
+	unsigned int irq_nr = d->irq - JMR3927_IRQ_IOC;
 	unsigned char imask = jmr3927_ioc_reg_in(JMR3927_IOC_INTM_ADDR);
 	unsigned int bit = 1 << irq_nr;
 	jmr3927_ioc_reg_out(imask & ~bit, JMR3927_IOC_INTM_ADDR);
 	/* flush write buffer */
 	(void)jmr3927_ioc_reg_in(JMR3927_IOC_REV_ADDR);
 }
-static void unmask_irq_ioc(unsigned int irq)
+static void unmask_irq_ioc(struct irq_data *d)
 {
 	/* 0: mask */
-	unsigned int irq_nr = irq - JMR3927_IRQ_IOC;
+	unsigned int irq_nr = d->irq - JMR3927_IRQ_IOC;
 	unsigned char imask = jmr3927_ioc_reg_in(JMR3927_IOC_INTM_ADDR);
 	unsigned int bit = 1 << irq_nr;
 	jmr3927_ioc_reg_out(imask | bit, JMR3927_IOC_INTM_ADDR);
@@ -95,10 +95,8 @@
 
 static struct irq_chip jmr3927_irq_ioc = {
 	.name = "jmr3927_ioc",
-	.ack = mask_irq_ioc,
-	.mask = mask_irq_ioc,
-	.mask_ack = mask_irq_ioc,
-	.unmask = unmask_irq_ioc,
+	.irq_mask = mask_irq_ioc,
+	.irq_unmask = unmask_irq_ioc,
 };
 
 void __init jmr3927_irq_setup(void)

diff --git a/arch/mips/txx9/rbtx4927/irq.c b/arch/mips/txx9/rbtx4927/irq.c
index c4b54d2..7c0a048 100644
--- a/arch/mips/txx9/rbtx4927/irq.c
+++ b/arch/mips/txx9/rbtx4927/irq.c

@@ -117,18 +117,6 @@
 #include <asm/txx9/generic.h>
 #include <asm/txx9/rbtx4927.h>
 
-static void toshiba_rbtx4927_irq_ioc_enable(unsigned int irq);
-static void toshiba_rbtx4927_irq_ioc_disable(unsigned int irq);
-
-#define TOSHIBA_RBTX4927_IOC_NAME "RBTX4927-IOC"
-static struct irq_chip toshiba_rbtx4927_irq_ioc_type = {
-	.name = TOSHIBA_RBTX4927_IOC_NAME,
-	.ack = toshiba_rbtx4927_irq_ioc_disable,
-	.mask = toshiba_rbtx4927_irq_ioc_disable,
-	.mask_ack = toshiba_rbtx4927_irq_ioc_disable,
-	.unmask = toshiba_rbtx4927_irq_ioc_enable,
-};
-
 static int toshiba_rbtx4927_irq_nested(int sw_irq)
 {
 	u8 level3;
@@ -139,6 +127,32 @@
 	return RBTX4927_IRQ_IOC + __fls8(level3);
 }
 
+static void toshiba_rbtx4927_irq_ioc_enable(struct irq_data *d)
+{
+	unsigned char v;
+
+	v = readb(rbtx4927_imask_addr);
+	v |= (1 << (d->irq - RBTX4927_IRQ_IOC));
+	writeb(v, rbtx4927_imask_addr);
+}
+
+static void toshiba_rbtx4927_irq_ioc_disable(struct irq_data *d)
+{
+	unsigned char v;
+
+	v = readb(rbtx4927_imask_addr);
+	v &= ~(1 << (d->irq - RBTX4927_IRQ_IOC));
+	writeb(v, rbtx4927_imask_addr);
+	mmiowb();
+}
+
+#define TOSHIBA_RBTX4927_IOC_NAME "RBTX4927-IOC"
+static struct irq_chip toshiba_rbtx4927_irq_ioc_type = {
+	.name = TOSHIBA_RBTX4927_IOC_NAME,
+	.irq_mask = toshiba_rbtx4927_irq_ioc_disable,
+	.irq_unmask = toshiba_rbtx4927_irq_ioc_enable,
+};
+
 static void __init toshiba_rbtx4927_irq_ioc_init(void)
 {
 	int i;
@@ -155,26 +169,6 @@
 	set_irq_chained_handler(RBTX4927_IRQ_IOCINT, handle_simple_irq);
 }
 
-static void toshiba_rbtx4927_irq_ioc_enable(unsigned int irq)
-{
-	unsigned char v;
-
-	v = readb(rbtx4927_imask_addr);
-	v |= (1 << (irq - RBTX4927_IRQ_IOC));
-	writeb(v, rbtx4927_imask_addr);
-}
-
-static void toshiba_rbtx4927_irq_ioc_disable(unsigned int irq)
-{
-	unsigned char v;
-
-	v = readb(rbtx4927_imask_addr);
-	v &= ~(1 << (irq - RBTX4927_IRQ_IOC));
-	writeb(v, rbtx4927_imask_addr);
-	mmiowb();
-}
-
-
 static int rbtx4927_irq_dispatch(int pending)
 {
 	int irq;

diff --git a/arch/mips/txx9/rbtx4938/irq.c b/arch/mips/txx9/rbtx4938/irq.c
index 67a73a8..2ec4fe1 100644
--- a/arch/mips/txx9/rbtx4938/irq.c
+++ b/arch/mips/txx9/rbtx4938/irq.c

@@ -69,18 +69,6 @@
 #include <asm/txx9/generic.h>
 #include <asm/txx9/rbtx4938.h>
 
-static void toshiba_rbtx4938_irq_ioc_enable(unsigned int irq);
-static void toshiba_rbtx4938_irq_ioc_disable(unsigned int irq);
-
-#define TOSHIBA_RBTX4938_IOC_NAME "RBTX4938-IOC"
-static struct irq_chip toshiba_rbtx4938_irq_ioc_type = {
-	.name = TOSHIBA_RBTX4938_IOC_NAME,
-	.ack = toshiba_rbtx4938_irq_ioc_disable,
-	.mask = toshiba_rbtx4938_irq_ioc_disable,
-	.mask_ack = toshiba_rbtx4938_irq_ioc_disable,
-	.unmask = toshiba_rbtx4938_irq_ioc_enable,
-};
-
 static int toshiba_rbtx4938_irq_nested(int sw_irq)
 {
 	u8 level3;
@@ -92,41 +80,33 @@
 	return RBTX4938_IRQ_IOC + __fls8(level3);
 }
 
-static void __init
-toshiba_rbtx4938_irq_ioc_init(void)
-{
-	int i;
-
-	for (i = RBTX4938_IRQ_IOC;
-	     i < RBTX4938_IRQ_IOC + RBTX4938_NR_IRQ_IOC; i++)
-		set_irq_chip_and_handler(i, &toshiba_rbtx4938_irq_ioc_type,
-					 handle_level_irq);
-
-	set_irq_chained_handler(RBTX4938_IRQ_IOCINT, handle_simple_irq);
-}
-
-static void
-toshiba_rbtx4938_irq_ioc_enable(unsigned int irq)
+static void toshiba_rbtx4938_irq_ioc_enable(struct irq_data *d)
 {
 	unsigned char v;
 
 	v = readb(rbtx4938_imask_addr);
-	v |= (1 << (irq - RBTX4938_IRQ_IOC));
+	v |= (1 << (d->irq - RBTX4938_IRQ_IOC));
 	writeb(v, rbtx4938_imask_addr);
 	mmiowb();
 }
 
-static void
-toshiba_rbtx4938_irq_ioc_disable(unsigned int irq)
+static void toshiba_rbtx4938_irq_ioc_disable(struct irq_data *d)
 {
 	unsigned char v;
 
 	v = readb(rbtx4938_imask_addr);
-	v &= ~(1 << (irq - RBTX4938_IRQ_IOC));
+	v &= ~(1 << (d->irq - RBTX4938_IRQ_IOC));
 	writeb(v, rbtx4938_imask_addr);
 	mmiowb();
 }
 
+#define TOSHIBA_RBTX4938_IOC_NAME "RBTX4938-IOC"
+static struct irq_chip toshiba_rbtx4938_irq_ioc_type = {
+	.name = TOSHIBA_RBTX4938_IOC_NAME,
+	.irq_mask = toshiba_rbtx4938_irq_ioc_disable,
+	.irq_unmask = toshiba_rbtx4938_irq_ioc_enable,
+};
+
 static int rbtx4938_irq_dispatch(int pending)
 {
 	int irq;
@@ -146,6 +126,18 @@
 	return irq;
 }
 
+static void __init toshiba_rbtx4938_irq_ioc_init(void)
+{
+	int i;
+
+	for (i = RBTX4938_IRQ_IOC;
+	     i < RBTX4938_IRQ_IOC + RBTX4938_NR_IRQ_IOC; i++)
+		set_irq_chip_and_handler(i, &toshiba_rbtx4938_irq_ioc_type,
+					 handle_level_irq);
+
+	set_irq_chained_handler(RBTX4938_IRQ_IOCINT, handle_simple_irq);
+}
+
 void __init rbtx4938_irq_setup(void)
 {
 	txx9_irq_dispatch = rbtx4938_irq_dispatch;

diff --git a/arch/mips/txx9/rbtx4939/irq.c b/arch/mips/txx9/rbtx4939/irq.c
index 57fa740..7007463 100644
--- a/arch/mips/txx9/rbtx4939/irq.c
+++ b/arch/mips/txx9/rbtx4939/irq.c

@@ -19,16 +19,16 @@
  * RBTX4939 IOC controller definition
  */
 
-static void rbtx4939_ioc_irq_unmask(unsigned int irq)
+static void rbtx4939_ioc_irq_unmask(struct irq_data *d)
 {
-	int ioc_nr = irq - RBTX4939_IRQ_IOC;
+	int ioc_nr = d->irq - RBTX4939_IRQ_IOC;
 
 	writeb(readb(rbtx4939_ien_addr) | (1 << ioc_nr), rbtx4939_ien_addr);
 }
 
-static void rbtx4939_ioc_irq_mask(unsigned int irq)
+static void rbtx4939_ioc_irq_mask(struct irq_data *d)
 {
-	int ioc_nr = irq - RBTX4939_IRQ_IOC;
+	int ioc_nr = d->irq - RBTX4939_IRQ_IOC;
 
 	writeb(readb(rbtx4939_ien_addr) & ~(1 << ioc_nr), rbtx4939_ien_addr);
 	mmiowb();
@@ -36,10 +36,8 @@
 
 static struct irq_chip rbtx4939_ioc_irq_chip = {
 	.name		= "IOC",
-	.ack		= rbtx4939_ioc_irq_mask,
-	.mask		= rbtx4939_ioc_irq_mask,
-	.mask_ack	= rbtx4939_ioc_irq_mask,
-	.unmask		= rbtx4939_ioc_irq_unmask,
+	.irq_mask	= rbtx4939_ioc_irq_mask,
+	.irq_unmask	= rbtx4939_ioc_irq_unmask,
 };
 
 

diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c
index 6153b6a..f53156b 100644
--- a/arch/mips/vr41xx/common/icu.c
+++ b/arch/mips/vr41xx/common/icu.c

@@ -154,7 +154,7 @@
 
 void vr41xx_enable_piuint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + PIU_IRQ;
+	struct irq_desc *desc = irq_to_desc(PIU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4111 ||
@@ -169,7 +169,7 @@
 
 void vr41xx_disable_piuint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + PIU_IRQ;
+	struct irq_desc *desc = irq_to_desc(PIU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4111 ||
@@ -184,7 +184,7 @@
 
 void vr41xx_enable_aiuint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + AIU_IRQ;
+	struct irq_desc *desc = irq_to_desc(AIU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4111 ||
@@ -199,7 +199,7 @@
 
 void vr41xx_disable_aiuint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + AIU_IRQ;
+	struct irq_desc *desc = irq_to_desc(AIU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4111 ||
@@ -214,7 +214,7 @@
 
 void vr41xx_enable_kiuint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + KIU_IRQ;
+	struct irq_desc *desc = irq_to_desc(KIU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4111 ||
@@ -229,7 +229,7 @@
 
 void vr41xx_disable_kiuint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + KIU_IRQ;
+	struct irq_desc *desc = irq_to_desc(KIU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4111 ||
@@ -244,7 +244,7 @@
 
 void vr41xx_enable_macint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + ETHERNET_IRQ;
+	struct irq_desc *desc = irq_to_desc(ETHERNET_IRQ);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
@@ -256,7 +256,7 @@
 
 void vr41xx_disable_macint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + ETHERNET_IRQ;
+	struct irq_desc *desc = irq_to_desc(ETHERNET_IRQ);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
@@ -268,7 +268,7 @@
 
 void vr41xx_enable_dsiuint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + DSIU_IRQ;
+	struct irq_desc *desc = irq_to_desc(DSIU_IRQ);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
@@ -280,7 +280,7 @@
 
 void vr41xx_disable_dsiuint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + DSIU_IRQ;
+	struct irq_desc *desc = irq_to_desc(DSIU_IRQ);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
@@ -292,7 +292,7 @@
 
 void vr41xx_enable_firint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + FIR_IRQ;
+	struct irq_desc *desc = irq_to_desc(FIR_IRQ);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
@@ -304,7 +304,7 @@
 
 void vr41xx_disable_firint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + FIR_IRQ;
+	struct irq_desc *desc = irq_to_desc(FIR_IRQ);
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
@@ -316,7 +316,7 @@
 
 void vr41xx_enable_pciint(void)
 {
-	struct irq_desc *desc = irq_desc + PCI_IRQ;
+	struct irq_desc *desc = irq_to_desc(PCI_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4122 ||
@@ -332,7 +332,7 @@
 
 void vr41xx_disable_pciint(void)
 {
-	struct irq_desc *desc = irq_desc + PCI_IRQ;
+	struct irq_desc *desc = irq_to_desc(PCI_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4122 ||
@@ -348,7 +348,7 @@
 
 void vr41xx_enable_scuint(void)
 {
-	struct irq_desc *desc = irq_desc + SCU_IRQ;
+	struct irq_desc *desc = irq_to_desc(SCU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4122 ||
@@ -364,7 +364,7 @@
 
 void vr41xx_disable_scuint(void)
 {
-	struct irq_desc *desc = irq_desc + SCU_IRQ;
+	struct irq_desc *desc = irq_to_desc(SCU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4122 ||
@@ -380,7 +380,7 @@
 
 void vr41xx_enable_csiint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + CSI_IRQ;
+	struct irq_desc *desc = irq_to_desc(CSI_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4122 ||
@@ -396,7 +396,7 @@
 
 void vr41xx_disable_csiint(uint16_t mask)
 {
-	struct irq_desc *desc = irq_desc + CSI_IRQ;
+	struct irq_desc *desc = irq_to_desc(CSI_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4122 ||
@@ -412,7 +412,7 @@
 
 void vr41xx_enable_bcuint(void)
 {
-	struct irq_desc *desc = irq_desc + BCU_IRQ;
+	struct irq_desc *desc = irq_to_desc(BCU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4122 ||
@@ -428,7 +428,7 @@
 
 void vr41xx_disable_bcuint(void)
 {
-	struct irq_desc *desc = irq_desc + BCU_IRQ;
+	struct irq_desc *desc = irq_to_desc(BCU_IRQ);
 	unsigned long flags;
 
 	if (current_cpu_type() == CPU_VR4122 ||
@@ -442,45 +442,41 @@
 
 EXPORT_SYMBOL(vr41xx_disable_bcuint);
 
-static void disable_sysint1_irq(unsigned int irq)
+static void disable_sysint1_irq(struct irq_data *d)
 {
-	icu1_clear(MSYSINT1REG, 1 << SYSINT1_IRQ_TO_PIN(irq));
+	icu1_clear(MSYSINT1REG, 1 << SYSINT1_IRQ_TO_PIN(d->irq));
 }
 
-static void enable_sysint1_irq(unsigned int irq)
+static void enable_sysint1_irq(struct irq_data *d)
 {
-	icu1_set(MSYSINT1REG, 1 << SYSINT1_IRQ_TO_PIN(irq));
+	icu1_set(MSYSINT1REG, 1 << SYSINT1_IRQ_TO_PIN(d->irq));
 }
 
 static struct irq_chip sysint1_irq_type = {
 	.name		= "SYSINT1",
-	.ack		= disable_sysint1_irq,
-	.mask		= disable_sysint1_irq,
-	.mask_ack	= disable_sysint1_irq,
-	.unmask		= enable_sysint1_irq,
+	.irq_mask	= disable_sysint1_irq,
+	.irq_unmask	= enable_sysint1_irq,
 };
 
-static void disable_sysint2_irq(unsigned int irq)
+static void disable_sysint2_irq(struct irq_data *d)
 {
-	icu2_clear(MSYSINT2REG, 1 << SYSINT2_IRQ_TO_PIN(irq));
+	icu2_clear(MSYSINT2REG, 1 << SYSINT2_IRQ_TO_PIN(d->irq));
 }
 
-static void enable_sysint2_irq(unsigned int irq)
+static void enable_sysint2_irq(struct irq_data *d)
 {
-	icu2_set(MSYSINT2REG, 1 << SYSINT2_IRQ_TO_PIN(irq));
+	icu2_set(MSYSINT2REG, 1 << SYSINT2_IRQ_TO_PIN(d->irq));
 }
 
 static struct irq_chip sysint2_irq_type = {
 	.name		= "SYSINT2",
-	.ack		= disable_sysint2_irq,
-	.mask		= disable_sysint2_irq,
-	.mask_ack	= disable_sysint2_irq,
-	.unmask		= enable_sysint2_irq,
+	.irq_mask	= disable_sysint2_irq,
+	.irq_unmask	= enable_sysint2_irq,
 };
 
 static inline int set_sysint1_assign(unsigned int irq, unsigned char assign)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	uint16_t intassign0, intassign1;
 	unsigned int pin;
 
@@ -540,7 +536,7 @@
 
 static inline int set_sysint2_assign(unsigned int irq, unsigned char assign)
 {
-	struct irq_desc *desc = irq_desc + irq;
+	struct irq_desc *desc = irq_to_desc(irq);
 	uint16_t intassign2, intassign3;
 	unsigned int pin;
 

diff --git a/arch/mips/vr41xx/common/irq.c b/arch/mips/vr41xx/common/irq.c
index 0975eb7..9ff7f39 100644
--- a/arch/mips/vr41xx/common/irq.c
+++ b/arch/mips/vr41xx/common/irq.c

@@ -62,7 +62,6 @@
 static void irq_dispatch(unsigned int irq)
 {
 	irq_cascade_t *cascade;
-	struct irq_desc *desc;
 
 	if (irq >= NR_IRQS) {
 		atomic_inc(&irq_err_count);
@@ -71,14 +70,16 @@
 
 	cascade = irq_cascade + irq;
 	if (cascade->get_irq != NULL) {
-		unsigned int source_irq = irq;
+		struct irq_desc *desc = irq_to_desc(irq);
+		struct irq_data *idata = irq_desc_get_irq_data(desc);
+		struct irq_chip *chip = irq_desc_get_chip(desc);
 		int ret;
-		desc = irq_desc + source_irq;
-		if (desc->chip->mask_ack)
-			desc->chip->mask_ack(source_irq);
+
+		if (chip->irq_mask_ack)
+			chip->irq_mask_ack(idata);
 		else {
-			desc->chip->mask(source_irq);
-			desc->chip->ack(source_irq);
+			chip->irq_mask(idata);
+			chip->irq_ack(idata);
 		}
 		ret = cascade->get_irq(irq);
 		irq = ret;
@@ -86,8 +87,8 @@
 			atomic_inc(&irq_err_count);
 		else
 			irq_dispatch(irq);
-		if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
-			desc->chip->unmask(source_irq);
+		if (!(desc->status & IRQ_DISABLED) && chip->irq_unmask)
+			chip->irq_unmask(idata);
 	} else
 		do_IRQ(irq);
 }

diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f4f4d70..b7ed8d7 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c

@@ -544,7 +544,7 @@
 unsigned long *empty_zero_page __read_mostly;
 EXPORT_SYMBOL(empty_zero_page);
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int i,free = 0,total = 0,reserved = 0;
 	int shared = 0, cached = 0;

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d17d04c..33794c1 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c

@@ -821,7 +821,7 @@
 				memzcan();
 				break;
 			case 'i':
-				show_mem();
+				show_mem(0);
 				break;
 			default:
 				termch = cmd;

diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index ff6f62e..623f2fb 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h

@@ -112,7 +112,6 @@
 
 /**
  * struct ccw driver - device driver for channel attached devices
- * @owner: owning module
  * @ids: ids supported by this driver
  * @probe: function called on probe
  * @remove: function called on remove
@@ -128,10 +127,8 @@
  * @restore: callback for restoring after hibernation
  * @uc_handler: callback for unit check handler
  * @driver: embedded device driver structure
- * @name: device driver name
  */
 struct ccw_driver {
-	struct module *owner;
 	struct ccw_device_id *ids;
 	int (*probe) (struct ccw_device *);
 	void (*remove) (struct ccw_device *);
@@ -147,7 +144,6 @@
 	int (*restore)(struct ccw_device *);
 	enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *);
 	struct device_driver driver;
-	char *name;
 };
 
 extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv,

diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index c79c1e7..f2ea2c5 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h

@@ -29,8 +29,6 @@
 
 /**
  * struct ccwgroup_driver - driver for ccw group devices
- * @owner: driver owner
- * @name: driver name
  * @max_slaves: maximum number of slave devices
  * @driver_id: unique id
  * @probe: function called on probe
@@ -46,8 +44,6 @@
  * @driver: embedded driver structure
  */
 struct ccwgroup_driver {
-	struct module *owner;
-	char *name;
 	int max_slaves;
 	unsigned long driver_id;
 

diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
new file mode 100644
index 0000000..7488e52
--- /dev/null
+++ b/arch/s390/include/asm/cmpxchg.h

@@ -0,0 +1,225 @@
+/*
+ * Copyright IBM Corp. 1999, 2011
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#ifndef __ASM_CMPXCHG_H
+#define __ASM_CMPXCHG_H
+
+#include <linux/types.h>
+
+extern void __xchg_called_with_bad_pointer(void);
+
+static inline unsigned long __xchg(unsigned long x, void *ptr, int size)
+{
+	unsigned long addr, old;
+	int shift;
+
+	switch (size) {
+	case 1:
+		addr = (unsigned long) ptr;
+		shift = (3 ^ (addr & 3)) << 3;
+		addr ^= addr & 3;
+		asm volatile(
+			"	l	%0,%4\n"
+			"0:	lr	0,%0\n"
+			"	nr	0,%3\n"
+			"	or	0,%2\n"
+			"	cs	%0,0,%4\n"
+			"	jl	0b\n"
+			: "=&d" (old), "=Q" (*(int *) addr)
+			: "d" (x << shift), "d" (~(255 << shift)),
+			  "Q" (*(int *) addr) : "memory", "cc", "0");
+		return old >> shift;
+	case 2:
+		addr = (unsigned long) ptr;
+		shift = (2 ^ (addr & 2)) << 3;
+		addr ^= addr & 2;
+		asm volatile(
+			"	l	%0,%4\n"
+			"0:	lr	0,%0\n"
+			"	nr	0,%3\n"
+			"	or	0,%2\n"
+			"	cs	%0,0,%4\n"
+			"	jl	0b\n"
+			: "=&d" (old), "=Q" (*(int *) addr)
+			: "d" (x << shift), "d" (~(65535 << shift)),
+			  "Q" (*(int *) addr) : "memory", "cc", "0");
+		return old >> shift;
+	case 4:
+		asm volatile(
+			"	l	%0,%3\n"
+			"0:	cs	%0,%2,%3\n"
+			"	jl	0b\n"
+			: "=&d" (old), "=Q" (*(int *) ptr)
+			: "d" (x), "Q" (*(int *) ptr)
+			: "memory", "cc");
+		return old;
+#ifdef CONFIG_64BIT
+	case 8:
+		asm volatile(
+			"	lg	%0,%3\n"
+			"0:	csg	%0,%2,%3\n"
+			"	jl	0b\n"
+			: "=&d" (old), "=m" (*(long *) ptr)
+			: "d" (x), "Q" (*(long *) ptr)
+			: "memory", "cc");
+		return old;
+#endif /* CONFIG_64BIT */
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+#define xchg(ptr, x)							  \
+({									  \
+	__typeof__(*(ptr)) __ret;					  \
+	__ret = (__typeof__(*(ptr)))					  \
+		__xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\
+	__ret;								  \
+})
+
+/*
+ * Atomic compare and exchange.	 Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.	Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#define __HAVE_ARCH_CMPXCHG
+
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static inline unsigned long __cmpxchg(void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	unsigned long addr, prev, tmp;
+	int shift;
+
+	switch (size) {
+	case 1:
+		addr = (unsigned long) ptr;
+		shift = (3 ^ (addr & 3)) << 3;
+		addr ^= addr & 3;
+		asm volatile(
+			"	l	%0,%2\n"
+			"0:	nr	%0,%5\n"
+			"	lr	%1,%0\n"
+			"	or	%0,%3\n"
+			"	or	%1,%4\n"
+			"	cs	%0,%1,%2\n"
+			"	jnl	1f\n"
+			"	xr	%1,%0\n"
+			"	nr	%1,%5\n"
+			"	jnz	0b\n"
+			"1:"
+			: "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr)
+			: "d" (old << shift), "d" (new << shift),
+			  "d" (~(255 << shift)), "Q" (*(int *) ptr)
+			: "memory", "cc");
+		return prev >> shift;
+	case 2:
+		addr = (unsigned long) ptr;
+		shift = (2 ^ (addr & 2)) << 3;
+		addr ^= addr & 2;
+		asm volatile(
+			"	l	%0,%2\n"
+			"0:	nr	%0,%5\n"
+			"	lr	%1,%0\n"
+			"	or	%0,%3\n"
+			"	or	%1,%4\n"
+			"	cs	%0,%1,%2\n"
+			"	jnl	1f\n"
+			"	xr	%1,%0\n"
+			"	nr	%1,%5\n"
+			"	jnz	0b\n"
+			"1:"
+			: "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr)
+			: "d" (old << shift), "d" (new << shift),
+			  "d" (~(65535 << shift)), "Q" (*(int *) ptr)
+			: "memory", "cc");
+		return prev >> shift;
+	case 4:
+		asm volatile(
+			"	cs	%0,%3,%1\n"
+			: "=&d" (prev), "=Q" (*(int *) ptr)
+			: "0" (old), "d" (new), "Q" (*(int *) ptr)
+			: "memory", "cc");
+		return prev;
+#ifdef CONFIG_64BIT
+	case 8:
+		asm volatile(
+			"	csg	%0,%3,%1\n"
+			: "=&d" (prev), "=Q" (*(long *) ptr)
+			: "0" (old), "d" (new), "Q" (*(long *) ptr)
+			: "memory", "cc");
+		return prev;
+#endif /* CONFIG_64BIT */
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define cmpxchg(ptr, o, n)						\
+	((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	\
+				       (unsigned long)(n), sizeof(*(ptr))))
+
+#ifdef CONFIG_64BIT
+#define cmpxchg64(ptr, o, n)						\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg((ptr), (o), (n));					\
+})
+#else /* CONFIG_64BIT */
+static inline unsigned long long __cmpxchg64(void *ptr,
+					     unsigned long long old,
+					     unsigned long long new)
+{
+	register_pair rp_old = {.pair = old};
+	register_pair rp_new = {.pair = new};
+
+	asm volatile(
+		"	cds	%0,%2,%1"
+		: "+&d" (rp_old), "=Q" (ptr)
+		: "d" (rp_new), "Q" (ptr)
+		: "cc");
+	return rp_old.pair;
+}
+#define cmpxchg64(ptr, o, n)						\
+	((__typeof__(*(ptr)))__cmpxchg64((ptr),				\
+					 (unsigned long long)(o),	\
+					 (unsigned long long)(n)))
+#endif /* CONFIG_64BIT */
+
+#include <asm-generic/cmpxchg-local.h>
+
+static inline unsigned long __cmpxchg_local(void *ptr,
+					    unsigned long old,
+					    unsigned long new, int size)
+{
+	switch (size) {
+	case 1:
+	case 2:
+	case 4:
+#ifdef CONFIG_64BIT
+	case 8:
+#endif
+		return __cmpxchg(ptr, old, new, size);
+	default:
+		return __cmpxchg_local_generic(ptr, old, new, size);
+	}
+
+	return old;
+}
+
+/*
+ * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
+ * them available.
+ */
+#define cmpxchg_local(ptr, o, n)					\
+	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
+			(unsigned long)(n), sizeof(*(ptr))))
+
+#define cmpxchg64_local(ptr, o, n)	cmpxchg64((ptr), (o), (n))
+
+#endif /* __ASM_CMPXCHG_H */

diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 8f8d759..d382629 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h

@@ -14,6 +14,7 @@
 #include <asm/setup.h>
 #include <asm/processor.h>
 #include <asm/lowcore.h>
+#include <asm/cmpxchg.h>
 
 #ifdef __KERNEL__
 
@@ -120,161 +121,6 @@
 
 #define nop() asm volatile("nop")
 
-#define xchg(ptr,x)							  \
-({									  \
-	__typeof__(*(ptr)) __ret;					  \
-	__ret = (__typeof__(*(ptr)))					  \
-		__xchg((unsigned long)(x), (void *)(ptr),sizeof(*(ptr))); \
-	__ret;								  \
-})
-
-extern void __xchg_called_with_bad_pointer(void);
-
-static inline unsigned long __xchg(unsigned long x, void * ptr, int size)
-{
-	unsigned long addr, old;
-	int shift;
-
-        switch (size) {
-	case 1:
-		addr = (unsigned long) ptr;
-		shift = (3 ^ (addr & 3)) << 3;
-		addr ^= addr & 3;
-		asm volatile(
-			"	l	%0,%4\n"
-			"0:	lr	0,%0\n"
-			"	nr	0,%3\n"
-			"	or	0,%2\n"
-			"	cs	%0,0,%4\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=Q" (*(int *) addr)
-			: "d" (x << shift), "d" (~(255 << shift)),
-			  "Q" (*(int *) addr) : "memory", "cc", "0");
-		return old >> shift;
-	case 2:
-		addr = (unsigned long) ptr;
-		shift = (2 ^ (addr & 2)) << 3;
-		addr ^= addr & 2;
-		asm volatile(
-			"	l	%0,%4\n"
-			"0:	lr	0,%0\n"
-			"	nr	0,%3\n"
-			"	or	0,%2\n"
-			"	cs	%0,0,%4\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=Q" (*(int *) addr)
-			: "d" (x << shift), "d" (~(65535 << shift)),
-			  "Q" (*(int *) addr) : "memory", "cc", "0");
-		return old >> shift;
-	case 4:
-		asm volatile(
-			"	l	%0,%3\n"
-			"0:	cs	%0,%2,%3\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=Q" (*(int *) ptr)
-			: "d" (x), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return old;
-#ifdef __s390x__
-	case 8:
-		asm volatile(
-			"	lg	%0,%3\n"
-			"0:	csg	%0,%2,%3\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=m" (*(long *) ptr)
-			: "d" (x), "Q" (*(long *) ptr)
-			: "memory", "cc");
-		return old;
-#endif /* __s390x__ */
-	}
-	__xchg_called_with_bad_pointer();
-	return x;
-}
-
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-
-#define __HAVE_ARCH_CMPXCHG 1
-
-#define cmpxchg(ptr, o, n)						\
-	((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	\
-					(unsigned long)(n), sizeof(*(ptr))))
-
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
-{
-	unsigned long addr, prev, tmp;
-	int shift;
-
-        switch (size) {
-	case 1:
-		addr = (unsigned long) ptr;
-		shift = (3 ^ (addr & 3)) << 3;
-		addr ^= addr & 3;
-		asm volatile(
-			"	l	%0,%2\n"
-			"0:	nr	%0,%5\n"
-			"	lr	%1,%0\n"
-			"	or	%0,%3\n"
-			"	or	%1,%4\n"
-			"	cs	%0,%1,%2\n"
-			"	jnl	1f\n"
-			"	xr	%1,%0\n"
-			"	nr	%1,%5\n"
-			"	jnz	0b\n"
-			"1:"
-			: "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr)
-			: "d" (old << shift), "d" (new << shift),
-			  "d" (~(255 << shift)), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return prev >> shift;
-	case 2:
-		addr = (unsigned long) ptr;
-		shift = (2 ^ (addr & 2)) << 3;
-		addr ^= addr & 2;
-		asm volatile(
-			"	l	%0,%2\n"
-			"0:	nr	%0,%5\n"
-			"	lr	%1,%0\n"
-			"	or	%0,%3\n"
-			"	or	%1,%4\n"
-			"	cs	%0,%1,%2\n"
-			"	jnl	1f\n"
-			"	xr	%1,%0\n"
-			"	nr	%1,%5\n"
-			"	jnz	0b\n"
-			"1:"
-			: "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr)
-			: "d" (old << shift), "d" (new << shift),
-			  "d" (~(65535 << shift)), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return prev >> shift;
-	case 4:
-		asm volatile(
-			"	cs	%0,%3,%1\n"
-			: "=&d" (prev), "=Q" (*(int *) ptr)
-			: "0" (old), "d" (new), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return prev;
-#ifdef __s390x__
-	case 8:
-		asm volatile(
-			"	csg	%0,%3,%1\n"
-			: "=&d" (prev), "=Q" (*(long *) ptr)
-			: "0" (old), "d" (new), "Q" (*(long *) ptr)
-			: "memory", "cc");
-		return prev;
-#endif /* __s390x__ */
-        }
-	__cmpxchg_called_with_bad_pointer();
-	return old;
-}
-
 /*
  * Force strict CPU ordering.
  * And yes, this is required on UP too when we're talking
@@ -353,46 +199,6 @@
 	__ctl_load(__dummy, cr, cr);	\
 })
 
-#include <linux/irqflags.h>
-
-#include <asm-generic/cmpxchg-local.h>
-
-static inline unsigned long __cmpxchg_local(volatile void *ptr,
-				      unsigned long old,
-				      unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-	case 2:
-	case 4:
-#ifdef __s390x__
-	case 8:
-#endif
-		return __cmpxchg(ptr, old, new, size);
-	default:
-		return __cmpxchg_local_generic(ptr, old, new, size);
-	}
-
-	return old;
-}
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)					\
-	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
-			(unsigned long)(n), sizeof(*(ptr))))
-#ifdef __s390x__
-#define cmpxchg64_local(ptr, o, n)					\
-  ({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
-  })
-#else
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-#endif
-
 /*
  * Use to set psw mask except for the first byte which
  * won't be changed by this function.

diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 1049ef2..e821525 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h

@@ -272,7 +272,11 @@
 #define __NR_fanotify_init	332
 #define __NR_fanotify_mark	333
 #define __NR_prlimit64		334
-#define NR_syscalls 335
+#define __NR_name_to_handle_at	335
+#define __NR_open_by_handle_at	336
+#define __NR_clock_adjtime	337
+#define __NR_syncfs		338
+#define NR_syscalls 339
 
 /* 
  * There are some system calls that are not present on 64 bit, some

diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 8e60fb2..1dc96ea 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S

@@ -1877,3 +1877,30 @@
 	llgtr	%r4,%r4			# const struct rlimit64 __user *
 	llgtr	%r5,%r5			# struct rlimit64 __user *
 	jg	sys_prlimit64		# branch to system call
+
+	.globl	sys_name_to_handle_at_wrapper
+sys_name_to_handle_at_wrapper:
+	lgfr	%r2,%r2			# int
+	llgtr	%r3,%r3			# const char __user *
+	llgtr	%r4,%r4			# struct file_handle __user *
+	llgtr	%r5,%r5			# int __user *
+	lgfr	%r6,%r6			# int
+	jg	sys_name_to_handle_at
+
+	.globl	compat_sys_open_by_handle_at_wrapper
+compat_sys_open_by_handle_at_wrapper:
+	lgfr	%r2,%r2			# int
+	llgtr	%r3,%r3			# struct file_handle __user *
+	lgfr	%r4,%r4			# int
+	jg	compat_sys_open_by_handle_at
+
+	.globl	compat_sys_clock_adjtime_wrapper
+compat_sys_clock_adjtime_wrapper:
+	lgfr	%r2,%r2			# clockid_t (int)
+	llgtr	%r3,%r3			# struct compat_timex __user *
+	jg	compat_sys_clock_adjtime
+
+	.globl	sys_syncfs_wrapper
+sys_syncfs_wrapper:
+	lgfr	%r2,%r2			# int
+	jg	sys_syncfs

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 3b7e7dd..068f846 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c

@@ -94,6 +94,7 @@
 	unsigned int sinitrd_pfn, einitrd_pfn;
 #endif
 	int response;
+	int hlen;
 	size_t len;
 	char *savesys_ptr;
 	char defsys_cmd[DEFSYS_CMD_SIZE];
@@ -124,24 +125,27 @@
 	end_pfn = PFN_UP(__pa(&_end));
 	min_size = end_pfn << 2;
 
-	sprintf(defsys_cmd, "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X",
-		kernel_nss_name, stext_pfn - 1, stext_pfn, eshared_pfn - 1,
-		eshared_pfn, end_pfn);
+	hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE,
+			"DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X",
+			kernel_nss_name, stext_pfn - 1, stext_pfn,
+			eshared_pfn - 1, eshared_pfn, end_pfn);
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (INITRD_START && INITRD_SIZE) {
 		sinitrd_pfn = PFN_DOWN(__pa(INITRD_START));
 		einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE));
 		min_size = einitrd_pfn << 2;
-		sprintf(defsys_cmd, "%s EW %.5X-%.5X", defsys_cmd,
-		sinitrd_pfn, einitrd_pfn);
+		hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+				 " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn);
 	}
 #endif
 
-	sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK PARMREGS=0-13",
-		defsys_cmd, min_size);
-	sprintf(savesys_cmd, "SAVESYS %s \n IPL %s",
-		kernel_nss_name, kernel_nss_name);
+	snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+		 " EW MINSIZE=%.7iK PARMREGS=0-13", min_size);
+	defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0';
+	snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s",
+		 kernel_nss_name, kernel_nss_name);
+	savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0';
 
 	__cpcmd(defsys_cmd, NULL, 0, &response);
 

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 6f63508..ed183c2 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c

@@ -102,16 +102,6 @@
 
 #include <asm/setup.h>
 
-static struct resource code_resource = {
-	.name  = "Kernel code",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
-};
-
-static struct resource data_resource = {
-	.name = "Kernel data",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
-};
-
 /*
  * condev= and conmode= setup parameter.
  */
@@ -436,21 +426,43 @@
 	lowcore_ptr[0] = lc;
 }
 
-static void __init
-setup_resources(void)
+static struct resource code_resource = {
+	.name  = "Kernel code",
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource data_resource = {
+	.name = "Kernel data",
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource bss_resource = {
+	.name = "Kernel bss",
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource __initdata *standard_resources[] = {
+	&code_resource,
+	&data_resource,
+	&bss_resource,
+};
+
+static void __init setup_resources(void)
 {
-	struct resource *res, *sub_res;
-	int i;
+	struct resource *res, *std_res, *sub_res;
+	int i, j;
 
 	code_resource.start = (unsigned long) &_text;
 	code_resource.end = (unsigned long) &_etext - 1;
 	data_resource.start = (unsigned long) &_etext;
 	data_resource.end = (unsigned long) &_edata - 1;
+	bss_resource.start = (unsigned long) &__bss_start;
+	bss_resource.end = (unsigned long) &__bss_stop - 1;
 
 	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		if (!memory_chunk[i].size)
 			continue;
-		res = alloc_bootmem_low(sizeof(struct resource));
+		res = alloc_bootmem_low(sizeof(*res));
 		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
 		switch (memory_chunk[i].type) {
 		case CHUNK_READ_WRITE:
@@ -464,40 +476,24 @@
 			res->name = "reserved";
 		}
 		res->start = memory_chunk[i].addr;
-		res->end = memory_chunk[i].addr +  memory_chunk[i].size - 1;
+		res->end = res->start + memory_chunk[i].size - 1;
 		request_resource(&iomem_resource, res);
 
-		if (code_resource.start >= res->start  &&
-			code_resource.start <= res->end &&
-			code_resource.end > res->end) {
-			sub_res = alloc_bootmem_low(sizeof(struct resource));
-			memcpy(sub_res, &code_resource,
-				sizeof(struct resource));
-			sub_res->end = res->end;
-			code_resource.start = res->end + 1;
-			request_resource(res, sub_res);
+		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
+			std_res = standard_resources[j];
+			if (std_res->start < res->start ||
+			    std_res->start > res->end)
+				continue;
+			if (std_res->end > res->end) {
+				sub_res = alloc_bootmem_low(sizeof(*sub_res));
+				*sub_res = *std_res;
+				sub_res->end = res->end;
+				std_res->start = res->end + 1;
+				request_resource(res, sub_res);
+			} else {
+				request_resource(res, std_res);
+			}
 		}
-
-		if (code_resource.start >= res->start &&
-			code_resource.start <= res->end &&
-			code_resource.end <= res->end)
-			request_resource(res, &code_resource);
-
-		if (data_resource.start >= res->start &&
-			data_resource.start <= res->end &&
-			data_resource.end > res->end) {
-			sub_res = alloc_bootmem_low(sizeof(struct resource));
-			memcpy(sub_res, &data_resource,
-				sizeof(struct resource));
-			sub_res->end = res->end;
-			data_resource.start = res->end + 1;
-			request_resource(res, sub_res);
-		}
-
-		if (data_resource.start >= res->start &&
-			data_resource.start <= res->end &&
-			data_resource.end <= res->end)
-			request_resource(res, &data_resource);
 	}
 }
 

diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index a8fee1b..9c65fd4 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S

@@ -343,3 +343,7 @@
 SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper)
 SYSCALL(sys_fanotify_mark,sys_fanotify_mark,sys_fanotify_mark_wrapper)
 SYSCALL(sys_prlimit64,sys_prlimit64,sys_prlimit64_wrapper)
+SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrapper) /* 335 */
+SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper)
+SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
+SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)

diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index d698cdd..524c4b6 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile

@@ -6,4 +6,5 @@
 		oprofilefs.o oprofile_stats.o  \
 		timer_int.o )
 
-oprofile-y :=	$(DRIVER_OBJS) init.o backtrace.o hwsampler.o
+oprofile-y :=	$(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_64BIT)	+= hwsampler.o

diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 16c76de..c63d7e5 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c

@@ -18,6 +18,11 @@
 #include <linux/fs.h>
 
 #include "../../../drivers/oprofile/oprof.h"
+
+extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
+
+#ifdef CONFIG_64BIT
+
 #include "hwsampler.h"
 
 #define DEFAULT_INTERVAL	4096
@@ -37,8 +42,6 @@
 
 static struct oprofile_operations timer_ops;
 
-extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
-
 static int oprofile_hwsampler_start(void)
 {
 	int retval;
@@ -172,14 +175,22 @@
 	hwsampler_shutdown();
 }
 
+#endif /* CONFIG_64BIT */
+
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	ops->backtrace = s390_backtrace;
 
+#ifdef CONFIG_64BIT
 	return oprofile_hwsampler_init(ops);
+#else
+	return -ENODEV;
+#endif
 }
 
 void oprofile_arch_exit(void)
 {
+#ifdef CONFIG_64BIT
 	oprofile_hwsampler_exit();
+#endif
 }

diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 8237dd4..4e23639 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c

@@ -145,6 +145,10 @@
 	if (!model)
 		return -ENODEV;
 
+	/* Only the primary RTC has an address property */
+	if (!of_find_property(dp, "address", NULL))
+		return -ENODEV;
+
 	m48t59_rtc.resource = &op->resource[0];
 	if (!strcmp(model, "mk48t02")) {
 		/* Map the clock register io area read-only */

diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 6d0e02c..4c31e2b 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c

@@ -75,7 +75,7 @@
 	kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE);
 }
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	printk("Mem-info:\n");
 	show_free_areas();

diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 1a2b36f..de7d8e2 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c

@@ -41,7 +41,7 @@
  * The normal show_free_areas() is too verbose on Tile, with dozens
  * of processors and often four NUMA zones each with high and lowmem.
  */
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	struct zone *zone;
 

diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index 1e78940..109ddc0 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common

@@ -8,6 +8,7 @@
 	default y
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_HARDIRQS_NO_DEPRECATED
+	select GENERIC_IRQ_SHOW
 
 config MMU
 	bool

diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 64cfea8..9e485c7 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c

@@ -18,52 +18,6 @@
 #include "os.h"
 
 /*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
-{
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
-
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%d       ",j);
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		raw_spin_lock_irqsave(&desc->lock, flags);
-		action = desc->action;
-		if (!action)
-			goto skip;
-		seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#endif
-		seq_printf(p, " %14s", get_irq_desc_chip(desc)->name);
-		seq_printf(p, "  %s", action->name);
-
-		for (action=action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-
-		seq_putc(p, '\n');
-skip:
-		raw_spin_unlock_irqrestore(&desc->lock, flags);
-	} else if (i == NR_IRQS)
-		seq_putc(p, '\n');
-
-	return 0;
-}
-
-/*
  * This list is accessed under irq_lock, except in sigio_handler,
  * where it is safe from being modified.  IRQ handlers won't change it -
  * if an IRQ source has vanished, it will be freed by free_irqs just
@@ -390,11 +344,10 @@
 {
 	int i;
 
-	set_irq_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
+	irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
 
-	for (i = 1; i < NR_IRQS; i++) {
-		set_irq_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
-	}
+	for (i = 1; i < NR_IRQS; i++)
+		irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
 }
 
 /*

diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 3dbe370..1fc0263 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c

@@ -55,7 +55,7 @@
  */
 struct meminfo meminfo;
 
-void show_mem(void)
+void show_mem(unsigned int filter)
 {
 	int free = 0, total = 0, reserved = 0;
 	int shared = 0, cached = 0, slab = 0, i;

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 140e254..cc6c53a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -71,6 +71,7 @@
 	select GENERIC_IRQ_SHOW
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
+	select ARCH_NO_SYSDEV_OPS
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a09e1f0..d68fca6 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h

@@ -45,7 +45,7 @@
 #include <linux/stringify.h>
 
 #ifdef CONFIG_SMP
-#define __percpu_arg(x)		"%%"__stringify(__percpu_seg)":%P" #x
+#define __percpu_prefix		"%%"__stringify(__percpu_seg)":"
 #define __my_cpu_offset		percpu_read(this_cpu_off)
 
 /*
@@ -62,9 +62,11 @@
 	(typeof(*(ptr)) __kernel __force *)tcp_ptr__;	\
 })
 #else
-#define __percpu_arg(x)		"%P" #x
+#define __percpu_prefix		""
 #endif
 
+#define __percpu_arg(x)		__percpu_prefix "%P" #x
+
 /*
  * Initialized pointers to per-cpu variables needed for the boot
  * processor need to use these macros to get the proper address
@@ -507,6 +509,11 @@
  * it in software.  The address used in the cmpxchg16 instruction must be
  * aligned to a 16 byte boundary.
  */
+#ifdef CONFIG_SMP
+#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP3
+#else
+#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP2
+#endif
 #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)			\
 ({									\
 	char __ret;							\
@@ -515,12 +522,12 @@
 	typeof(o2) __o2 = o2;						\
 	typeof(o2) __n2 = n2;						\
 	typeof(o2) __dummy;						\
-	alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4,	\
-		       "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t",	\
+	alternative_io(CMPXCHG16B_EMU_CALL,				\
+		       "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t",	\
 		       X86_FEATURE_CX16,				\
 		       ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),		\
 		       "S" (&pcp1), "b"(__n1), "c"(__n2),		\
-		       "a"(__o1), "d"(__o2));				\
+		       "a"(__o1), "d"(__o2) : "memory");		\
 	__ret;								\
 })
 

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 6e11c81..246d727 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c

@@ -21,7 +21,7 @@
 #include <linux/acpi.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/msi.h>
 #include <asm/pci-direct.h>
@@ -1260,7 +1260,7 @@
  * disable suspend until real resume implemented
  */
 
-static int amd_iommu_resume(struct sys_device *dev)
+static void amd_iommu_resume(void)
 {
 	struct amd_iommu *iommu;
 
@@ -1276,11 +1276,9 @@
 	 */
 	amd_iommu_flush_all_devices();
 	amd_iommu_flush_all_domains();
-
-	return 0;
 }
 
-static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
+static int amd_iommu_suspend(void)
 {
 	/* disable IOMMUs to go out of the way for BIOS */
 	disable_iommus();
@@ -1288,17 +1286,11 @@
 	return 0;
 }
 
-static struct sysdev_class amd_iommu_sysdev_class = {
-	.name = "amd_iommu",
+static struct syscore_ops amd_iommu_syscore_ops = {
 	.suspend = amd_iommu_suspend,
 	.resume = amd_iommu_resume,
 };
 
-static struct sys_device device_amd_iommu = {
-	.id = 0,
-	.cls = &amd_iommu_sysdev_class,
-};
-
 /*
  * This is the core init function for AMD IOMMU hardware in the system.
  * This function is called from the generic x86 DMA layer initialization
@@ -1415,14 +1407,6 @@
 		goto free;
 	}
 
-	ret = sysdev_class_register(&amd_iommu_sysdev_class);
-	if (ret)
-		goto free;
-
-	ret = sysdev_register(&device_amd_iommu);
-	if (ret)
-		goto free;
-
 	ret = amd_iommu_init_devices();
 	if (ret)
 		goto free;
@@ -1441,6 +1425,8 @@
 
 	amd_iommu_init_notifier();
 
+	register_syscore_ops(&amd_iommu_syscore_ops);
+
 	if (iommu_pass_through)
 		goto out;
 

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 966673f..fabf01e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c

@@ -24,7 +24,7 @@
 #include <linux/ftrace.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/delay.h>
 #include <linux/timex.h>
 #include <linux/dmar.h>
@@ -2046,7 +2046,7 @@
 	unsigned int apic_thmr;
 } apic_pm_state;
 
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
+static int lapic_suspend(void)
 {
 	unsigned long flags;
 	int maxlvt;
@@ -2084,23 +2084,21 @@
 	return 0;
 }
 
-static int lapic_resume(struct sys_device *dev)
+static void lapic_resume(void)
 {
 	unsigned int l, h;
 	unsigned long flags;
-	int maxlvt;
-	int ret = 0;
+	int maxlvt, ret;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
 	if (!apic_pm_state.active)
-		return 0;
+		return;
 
 	local_irq_save(flags);
 	if (intr_remapping_enabled) {
 		ioapic_entries = alloc_ioapic_entries();
 		if (!ioapic_entries) {
 			WARN(1, "Alloc ioapic_entries in lapic resume failed.");
-			ret = -ENOMEM;
 			goto restore;
 		}
 
@@ -2162,8 +2160,6 @@
 	}
 restore:
 	local_irq_restore(flags);
-
-	return ret;
 }
 
 /*
@@ -2171,17 +2167,11 @@
  * are needed on every CPU up until machine_halt/restart/poweroff.
  */
 
-static struct sysdev_class lapic_sysclass = {
-	.name		= "lapic",
+static struct syscore_ops lapic_syscore_ops = {
 	.resume		= lapic_resume,
 	.suspend	= lapic_suspend,
 };
 
-static struct sys_device device_lapic = {
-	.id	= 0,
-	.cls	= &lapic_sysclass,
-};
-
 static void __cpuinit apic_pm_activate(void)
 {
 	apic_pm_state.active = 1;
@@ -2189,16 +2179,11 @@
 
 static int __init init_lapic_sysfs(void)
 {
-	int error;
-
-	if (!cpu_has_apic)
-		return 0;
 	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+	if (cpu_has_apic)
+		register_syscore_ops(&lapic_syscore_ops);
 
-	error = sysdev_class_register(&lapic_sysclass);
-	if (!error)
-		error = sysdev_register(&device_lapic);
-	return error;
+	return 0;
 }
 
 /* local apic needs to resume before other devices access its registers. */

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 180ca24..68df09b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c

@@ -30,7 +30,7 @@
 #include <linux/compiler.h>
 #include <linux/acpi.h>
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
 #include <linux/freezer.h>
@@ -2918,89 +2918,84 @@
 
 late_initcall(io_apic_bug_finalize);
 
-struct sysfs_ioapic_data {
-	struct sys_device dev;
-	struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+static struct IO_APIC_route_entry *ioapic_saved_data[MAX_IO_APICS];
 
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+static void suspend_ioapic(int ioapic_id)
 {
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
+	struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
 	int i;
 
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
-		*entry = ioapic_read_entry(dev->id, i);
+	if (!saved_data)
+		return;
+
+	for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
+		saved_data[i] = ioapic_read_entry(ioapic_id, i);
+}
+
+static int ioapic_suspend(void)
+{
+	int ioapic_id;
+
+	for (ioapic_id = 0; ioapic_id < nr_ioapics; ioapic_id++)
+		suspend_ioapic(ioapic_id);
 
 	return 0;
 }
 
-static int ioapic_resume(struct sys_device *dev)
+static void resume_ioapic(int ioapic_id)
 {
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
+	struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
 	unsigned long flags;
 	union IO_APIC_reg_00 reg_00;
 	int i;
 
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
+	if (!saved_data)
+		return;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].apicid;
-		io_apic_write(dev->id, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(ioapic_id, 0);
+	if (reg_00.bits.ID != mp_ioapics[ioapic_id].apicid) {
+		reg_00.bits.ID = mp_ioapics[ioapic_id].apicid;
+		io_apic_write(ioapic_id, 0, reg_00.raw);
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		ioapic_write_entry(dev->id, i, entry[i]);
-
-	return 0;
+	for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
+		ioapic_write_entry(ioapic_id, i, saved_data[i]);
 }
 
-static struct sysdev_class ioapic_sysdev_class = {
-	.name = "ioapic",
+static void ioapic_resume(void)
+{
+	int ioapic_id;
+
+	for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--)
+		resume_ioapic(ioapic_id);
+}
+
+static struct syscore_ops ioapic_syscore_ops = {
 	.suspend = ioapic_suspend,
 	.resume = ioapic_resume,
 };
 
-static int __init ioapic_init_sysfs(void)
+static int __init ioapic_init_ops(void)
 {
-	struct sys_device * dev;
-	int i, size, error;
+	int i;
 
-	error = sysdev_class_register(&ioapic_sysdev_class);
-	if (error)
-		return error;
+	for (i = 0; i < nr_ioapics; i++) {
+		unsigned int size;
 
-	for (i = 0; i < nr_ioapics; i++ ) {
-		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+		size = nr_ioapic_registers[i]
 			* sizeof(struct IO_APIC_route_entry);
-		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
-		if (!mp_ioapic_data[i]) {
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-		dev = &mp_ioapic_data[i]->dev;
-		dev->id = i;
-		dev->cls = &ioapic_sysdev_class;
-		error = sysdev_register(dev);
-		if (error) {
-			kfree(mp_ioapic_data[i]);
-			mp_ioapic_data[i] = NULL;
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
+		ioapic_saved_data[i] = kzalloc(size, GFP_KERNEL);
+		if (!ioapic_saved_data[i])
+			pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
 	}
 
+	register_syscore_ops(&ioapic_syscore_ops);
+
 	return 0;
 }
 
-device_initcall(ioapic_init_sysfs);
+device_initcall(ioapic_init_ops);
 
 /*
  * Dynamic irq allocate and deallocation

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ab11229..5a05ef6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c

@@ -21,6 +21,7 @@
 #include <linux/percpu.h>
 #include <linux/string.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/delay.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
@@ -1749,14 +1750,14 @@
 	return 0;
 }
 
-static int mce_suspend(struct sys_device *dev, pm_message_t state)
+static int mce_suspend(void)
 {
 	return mce_disable_error_reporting();
 }
 
-static int mce_shutdown(struct sys_device *dev)
+static void mce_shutdown(void)
 {
-	return mce_disable_error_reporting();
+	mce_disable_error_reporting();
 }
 
 /*
@@ -1764,14 +1765,18 @@
  * Only one CPU is active at this time, the others get re-added later using
  * CPU hotplug:
  */
-static int mce_resume(struct sys_device *dev)
+static void mce_resume(void)
 {
 	__mcheck_cpu_init_generic();
 	__mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
-
-	return 0;
 }
 
+static struct syscore_ops mce_syscore_ops = {
+	.suspend	= mce_suspend,
+	.shutdown	= mce_shutdown,
+	.resume		= mce_resume,
+};
+
 static void mce_cpu_restart(void *data)
 {
 	del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1808,9 +1813,6 @@
 }
 
 static struct sysdev_class mce_sysclass = {
-	.suspend	= mce_suspend,
-	.shutdown	= mce_shutdown,
-	.resume		= mce_resume,
 	.name		= "machinecheck",
 };
 
@@ -2139,6 +2141,7 @@
 			return err;
 	}
 
+	register_syscore_ops(&mce_syscore_ops);
 	register_hotcpu_notifier(&mce_cpu_notifier);
 	misc_register(&mce_log_device);
 

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index bebabec..307dfbb 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c

@@ -45,6 +45,7 @@
 #include <linux/cpu.h>
 #include <linux/pci.h>
 #include <linux/smp.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/processor.h>
 #include <asm/e820.h>
@@ -630,7 +631,7 @@
 
 static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
 
-static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
+static int mtrr_save(void)
 {
 	int i;
 
@@ -642,7 +643,7 @@
 	return 0;
 }
 
-static int mtrr_restore(struct sys_device *sysdev)
+static void mtrr_restore(void)
 {
 	int i;
 
@@ -653,12 +654,11 @@
 				    mtrr_value[i].ltype);
 		}
 	}
-	return 0;
 }
 
 
 
-static struct sysdev_driver mtrr_sysdev_driver = {
+static struct syscore_ops mtrr_syscore_ops = {
 	.suspend	= mtrr_save,
 	.resume		= mtrr_restore,
 };
@@ -839,7 +839,7 @@
 	 * TBD: is there any system with such CPU which supports
 	 * suspend/resume? If no, we should remove the code.
 	 */
-	sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver);
+	register_syscore_ops(&mtrr_syscore_ops);
 
 	return 0;
 }

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 87eab4a..eed3673a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c

@@ -500,12 +500,17 @@
 	return true;
 
 bios_fail:
-	printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
+	/*
+	 * We still allow the PMU driver to operate:
+	 */
+	printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
 	printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
-	return false;
+
+	return true;
 
 msr_fail:
 	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+
 	return false;
 }
 
@@ -912,7 +917,7 @@
 		hwc->event_base	= 0;
 	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
 		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0;
+		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
 	} else {
 		hwc->config_base = x86_pmu_config_addr(hwc->idx);
 		hwc->event_base  = x86_pmu_event_addr(hwc->idx);

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 0811f5e..c2520e1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c

@@ -777,6 +777,7 @@
 	 * the counter has reached zero value and continued counting before
 	 * real NMI signal was received:
 	 */
+	rdmsrl(hwc->event_base, v);
 	if (!(v & ARCH_P4_UNFLAGGED_BIT))
 		return 1;
 

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 7a8cebc..706a9fb 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c

@@ -65,12 +65,10 @@
 		return 0;
 	ret = ih->xlate(ih, intspec, intsize, &virq, &type);
 	if (ret)
-		return ret;
+		return 0;
 	if (type == IRQ_TYPE_NONE)
 		return virq;
-	/* set the mask if it is different from current */
-	if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
-		set_irq_type(virq, type);
+	irq_set_irq_type(virq, type);
 	return virq;
 }
 EXPORT_SYMBOL_GPL(irq_create_of_mapping);

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 81ac6c7..e2a3f06 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c

@@ -27,7 +27,7 @@
 
 void printk_address(unsigned long address, int reliable)
 {
-	printk(" [<%p>] %s%pS\n", (void *) address,
+	printk(" [<%p>] %s%pB\n", (void *) address,
 			reliable ? "" : "? ", (void *) address);
 }
 

diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c
index b42ca69..8eeaa81 100644
--- a/arch/x86/kernel/i8237.c
+++ b/arch/x86/kernel/i8237.c

@@ -10,7 +10,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/dma.h>
 
@@ -21,7 +21,7 @@
  * in asm/dma.h.
  */
 
-static int i8237A_resume(struct sys_device *dev)
+static void i8237A_resume(void)
 {
 	unsigned long flags;
 	int i;
@@ -41,31 +41,15 @@
 	enable_dma(4);
 
 	release_dma_lock(flags);
-
-	return 0;
 }
 
-static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
-{
-	return 0;
-}
-
-static struct sysdev_class i8237_sysdev_class = {
-	.name		= "i8237",
-	.suspend	= i8237A_suspend,
+static struct syscore_ops i8237_syscore_ops = {
 	.resume		= i8237A_resume,
 };
 
-static struct sys_device device_i8237A = {
-	.id		= 0,
-	.cls		= &i8237_sysdev_class,
-};
-
-static int __init i8237A_init_sysfs(void)
+static int __init i8237A_init_ops(void)
 {
-	int error = sysdev_class_register(&i8237_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_i8237A);
-	return error;
+	register_syscore_ops(&i8237_syscore_ops);
+	return 0;
 }
-device_initcall(i8237A_init_sysfs);
+device_initcall(i8237A_init_ops);

diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index d9ca749..65b8f5c 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c

@@ -8,7 +8,7 @@
 #include <linux/random.h>
 #include <linux/init.h>
 #include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/bitops.h>
 #include <linux/acpi.h>
 #include <linux/io.h>
@@ -245,20 +245,19 @@
 	trigger[1] = inb(0x4d1) & 0xDE;
 }
 
-static int i8259A_resume(struct sys_device *dev)
+static void i8259A_resume(void)
 {
 	init_8259A(i8259A_auto_eoi);
 	restore_ELCR(irq_trigger);
-	return 0;
 }
 
-static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
+static int i8259A_suspend(void)
 {
 	save_ELCR(irq_trigger);
 	return 0;
 }
 
-static int i8259A_shutdown(struct sys_device *dev)
+static void i8259A_shutdown(void)
 {
 	/* Put the i8259A into a quiescent state that
 	 * the kernel initialization code can get it
@@ -266,21 +265,14 @@
 	 */
 	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
 	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
-	return 0;
 }
 
-static struct sysdev_class i8259_sysdev_class = {
-	.name = "i8259",
+static struct syscore_ops i8259_syscore_ops = {
 	.suspend = i8259A_suspend,
 	.resume = i8259A_resume,
 	.shutdown = i8259A_shutdown,
 };
 
-static struct sys_device device_i8259A = {
-	.id	= 0,
-	.cls	= &i8259_sysdev_class,
-};
-
 static void mask_8259A(void)
 {
 	unsigned long flags;
@@ -399,17 +391,12 @@
 
 struct legacy_pic *legacy_pic = &default_legacy_pic;
 
-static int __init i8259A_init_sysfs(void)
+static int __init i8259A_init_ops(void)
 {
-	int error;
+	if (legacy_pic == &default_legacy_pic)
+		register_syscore_ops(&i8259_syscore_ops);
 
-	if (legacy_pic != &default_legacy_pic)
-		return 0;
-
-	error = sysdev_class_register(&i8259_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_i8259A);
-	return error;
+	return 0;
 }
 
-device_initcall(i8259A_init_sysfs);
+device_initcall(i8259A_init_ops);

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index dba0b36..5f9ecff 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c

@@ -121,8 +121,8 @@
 		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
 		       dbg_reg_def[regno].size);
 
-	switch (regno) {
 #ifdef CONFIG_X86_32
+	switch (regno) {
 	case GDB_SS:
 		if (!user_mode_vm(regs))
 			*(unsigned long *)mem = __KERNEL_DS;
@@ -135,8 +135,8 @@
 	case GDB_FS:
 		*(unsigned long *)mem = 0xFFFF;
 		break;
-#endif
 	}
+#endif
 	return dbg_reg_def[regno].name;
 }
 

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 87af68e..5ed0ab5 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c

@@ -82,6 +82,7 @@
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
@@ -438,33 +439,25 @@
 	return 0;
 }
 
-static int mc_sysdev_resume(struct sys_device *dev)
-{
-	int cpu = dev->id;
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	if (!cpu_online(cpu))
-		return 0;
-
-	/*
-	 * All non-bootup cpus are still disabled,
-	 * so only CPU 0 will apply ucode here.
-	 *
-	 * Moreover, there can be no concurrent
-	 * updates from any other places at this point.
-	 */
-	WARN_ON(cpu != 0);
-
-	if (uci->valid && uci->mc)
-		microcode_ops->apply_microcode(cpu);
-
-	return 0;
-}
-
 static struct sysdev_driver mc_sysdev_driver = {
 	.add			= mc_sysdev_add,
 	.remove			= mc_sysdev_remove,
-	.resume			= mc_sysdev_resume,
+};
+
+/**
+ * mc_bp_resume - Update boot CPU microcode during resume.
+ */
+static void mc_bp_resume(void)
+{
+	int cpu = smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	if (uci->valid && uci->mc)
+		microcode_ops->apply_microcode(cpu);
+}
+
+static struct syscore_ops mc_syscore_ops = {
+	.resume			= mc_bp_resume,
 };
 
 static __cpuinit int
@@ -542,6 +535,7 @@
 	if (error)
 		return error;
 
+	register_syscore_ops(&mc_syscore_ops);
 	register_hotcpu_notifier(&mc_cpu_notifier);
 
 	pr_info("Microcode Update Driver: v" MICROCODE_VERSION

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6f789a8..5a532ce 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c

@@ -714,10 +714,6 @@
 		*nr_m_spare += 1;
 	}
 }
-#else /* CONFIG_X86_IO_APIC */
-static
-inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
-#endif /* CONFIG_X86_IO_APIC */
 
 static int
 check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
@@ -731,6 +727,10 @@
 
 	return ret;
 }
+#else /* CONFIG_X86_IO_APIC */
+static
+inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
+#endif /* CONFIG_X86_IO_APIC */
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
 					unsigned long mpc_new_phys,

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c01ffa5..82ada01 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c

@@ -27,7 +27,7 @@
 #include <linux/kdebug.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
 #include <asm/atomic.h>
@@ -589,7 +589,7 @@
 	aperture_alloc = aper_alloc;
 }
 
-static void gart_fixup_northbridges(struct sys_device *dev)
+static void gart_fixup_northbridges(void)
 {
 	int i;
 
@@ -613,33 +613,20 @@
 	}
 }
 
-static int gart_resume(struct sys_device *dev)
+static void gart_resume(void)
 {
 	pr_info("PCI-DMA: Resuming GART IOMMU\n");
 
-	gart_fixup_northbridges(dev);
+	gart_fixup_northbridges();
 
 	enable_gart_translations();
-
-	return 0;
 }
 
-static int gart_suspend(struct sys_device *dev, pm_message_t state)
-{
-	return 0;
-}
-
-static struct sysdev_class gart_sysdev_class = {
-	.name		= "gart",
-	.suspend	= gart_suspend,
+static struct syscore_ops gart_syscore_ops = {
 	.resume		= gart_resume,
 
 };
 
-static struct sys_device device_gart = {
-	.cls		= &gart_sysdev_class,
-};
-
 /*
  * Private Northbridge GATT initialization in case we cannot use the
  * AGP driver for some reason.
@@ -650,7 +637,7 @@
 	unsigned aper_base, new_aper_base;
 	struct pci_dev *dev;
 	void *gatt;
-	int i, error;
+	int i;
 
 	pr_info("PCI-DMA: Disabling AGP.\n");
 
@@ -685,12 +672,7 @@
 
 	agp_gatt_table = gatt;
 
-	error = sysdev_class_register(&gart_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_gart);
-	if (error)
-		panic("Could not register gart_sysdev -- "
-		      "would corrupt data on next suspend");
+	register_syscore_ops(&gart_syscore_ops);
 
 	flush_gart();
 

diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 3e8b08a..1e572c5 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S

@@ -10,6 +10,12 @@
 #include <asm/frame.h>
 #include <asm/dwarf2.h>
 
+#ifdef CONFIG_SMP
+#define SEG_PREFIX %gs:
+#else
+#define SEG_PREFIX
+#endif
+
 .text
 
 /*
@@ -37,13 +43,13 @@
 	pushf
 	cli
 
-	cmpq %gs:(%rsi), %rax
+	cmpq SEG_PREFIX(%rsi), %rax
 	jne not_same
-	cmpq %gs:8(%rsi), %rdx
+	cmpq SEG_PREFIX 8(%rsi), %rdx
 	jne not_same
 
-	movq %rbx, %gs:(%rsi)
-	movq %rcx, %gs:8(%rsi)
+	movq %rbx, SEG_PREFIX(%rsi)
+	movq %rcx, SEG_PREFIX 8(%rsi)
 
 	popf
 	mov $1, %al

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index e2b7b0c..8dace18 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c

@@ -15,7 +15,7 @@
 #include <linux/notifier.h>
 #include <linux/smp.h>
 #include <linux/oprofile.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/slab.h>
 #include <linux/moduleparam.h>
 #include <linux/kdebug.h>
@@ -536,7 +536,7 @@
 
 #ifdef CONFIG_PM
 
-static int nmi_suspend(struct sys_device *dev, pm_message_t state)
+static int nmi_suspend(void)
 {
 	/* Only one CPU left, just stop that one */
 	if (nmi_enabled == 1)
@@ -544,49 +544,31 @@
 	return 0;
 }
 
-static int nmi_resume(struct sys_device *dev)
+static void nmi_resume(void)
 {
 	if (nmi_enabled == 1)
 		nmi_cpu_start(NULL);
-	return 0;
 }
 
-static struct sysdev_class oprofile_sysclass = {
-	.name		= "oprofile",
+static struct syscore_ops oprofile_syscore_ops = {
 	.resume		= nmi_resume,
 	.suspend	= nmi_suspend,
 };
 
-static struct sys_device device_oprofile = {
-	.id	= 0,
-	.cls	= &oprofile_sysclass,
-};
-
-static int __init init_sysfs(void)
+static void __init init_suspend_resume(void)
 {
-	int error;
-
-	error = sysdev_class_register(&oprofile_sysclass);
-	if (error)
-		return error;
-
-	error = sysdev_register(&device_oprofile);
-	if (error)
-		sysdev_class_unregister(&oprofile_sysclass);
-
-	return error;
+	register_syscore_ops(&oprofile_syscore_ops);
 }
 
-static void exit_sysfs(void)
+static void exit_suspend_resume(void)
 {
-	sysdev_unregister(&device_oprofile);
-	sysdev_class_unregister(&oprofile_sysclass);
+	unregister_syscore_ops(&oprofile_syscore_ops);
 }
 
 #else
 
-static inline int  init_sysfs(void) { return 0; }
-static inline void exit_sysfs(void) { }
+static inline void init_suspend_resume(void) { }
+static inline void exit_suspend_resume(void) { }
 
 #endif /* CONFIG_PM */
 
@@ -789,9 +771,7 @@
 
 	mux_init(ops);
 
-	ret = init_sysfs();
-	if (ret)
-		return ret;
+	init_suspend_resume();
 
 	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
 	return 0;
@@ -799,5 +779,5 @@
 
 void op_nmi_exit(void)
 {
-	exit_sysfs();
+	exit_suspend_resume();
 }

diff --git a/arch/x86/platform/olpc/olpc-xo1.c b/arch/x86/platform/olpc/olpc-xo1.c
index 9951364..ab81fb2 100644
--- a/arch/x86/platform/olpc/olpc-xo1.c
+++ b/arch/x86/platform/olpc/olpc-xo1.c

@@ -72,9 +72,9 @@
 		dev_err(&pdev->dev, "can't fetch device resource info\n");
 		return -EIO;
 	}
-	if (strcmp(pdev->name, "olpc-xo1-pms") == 0)
+	if (strcmp(pdev->name, "cs5535-pms") == 0)
 		pms_base = res->start;
-	else if (strcmp(pdev->name, "olpc-xo1-ac-acpi") == 0)
+	else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
 		acpi_base = res->start;
 
 	/* If we have both addresses, we can override the poweroff hook */
@@ -90,9 +90,9 @@
 {
 	mfd_cell_disable(pdev);
 
-	if (strcmp(pdev->name, "olpc-xo1-pms") == 0)
+	if (strcmp(pdev->name, "cs5535-pms") == 0)
 		pms_base = 0;
-	else if (strcmp(pdev->name, "olpc-xo1-acpi") == 0)
+	else if (strcmp(pdev->name, "olpc-xo1-pm-acpi") == 0)
 		acpi_base = 0;
 
 	pm_power_off = NULL;
@@ -101,7 +101,7 @@
 
 static struct platform_driver cs5535_pms_drv = {
 	.driver = {
-		.name = "olpc-xo1-pms",
+		.name = "cs5535-pms",
 		.owner = THIS_MODULE,
 	},
 	.probe = olpc_xo1_probe,
@@ -110,7 +110,7 @@
 
 static struct platform_driver cs5535_acpi_drv = {
 	.driver = {
-		.name = "olpc-xo1-acpi",
+		.name = "olpc-xo1-pm-acpi",
 		.owner = THIS_MODULE,
 	},
 	.probe = olpc_xo1_probe,
@@ -121,22 +121,21 @@
 {
 	int r;
 
-	r = mfd_shared_platform_driver_register(&cs5535_pms_drv, "cs5535-pms");
+	r = platform_driver_register(&cs5535_pms_drv);
 	if (r)
 		return r;
 
-	r = mfd_shared_platform_driver_register(&cs5535_acpi_drv,
-			"cs5535-acpi");
+	r = platform_driver_register(&cs5535_acpi_drv);
 	if (r)
-		mfd_shared_platform_driver_unregister(&cs5535_pms_drv);
+		platform_driver_unregister(&cs5535_pms_drv);
 
 	return r;
 }
 
 static void __exit olpc_xo1_exit(void)
 {
-	mfd_shared_platform_driver_unregister(&cs5535_acpi_drv);
-	mfd_shared_platform_driver_unregister(&cs5535_pms_drv);
+	platform_driver_unregister(&cs5535_acpi_drv);
+	platform_driver_unregister(&cs5535_pms_drv);
 }
 
 MODULE_AUTHOR("Daniel Drake <dsd@laptop.org>");

diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 7283919..1d730b5 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig

@@ -7,6 +7,9 @@
 config XTENSA
 	def_bool y
 	select HAVE_IDE
+	select HAVE_GENERIC_HARDIRQS
+	select GENERIC_IRQ_SHOW
+	select GENERIC_HARDIRQS_NO_DEPRECATED
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
@@ -27,9 +30,6 @@
 config GENERIC_HWEIGHT
 	def_bool y
 
-config GENERIC_HARDIRQS
-	def_bool y
-
 config GENERIC_GPIO
 	def_bool y
 

diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 8750888..d77089d 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c

@@ -35,7 +35,6 @@
 asmlinkage void do_IRQ(int irq, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	struct irq_desc *desc = irq_desc + irq;
 
 	if (irq >= NR_IRQS) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
@@ -57,104 +56,69 @@
 			       sp - sizeof(struct thread_info));
 	}
 #endif
-	desc->handle_irq(irq, desc);
+	generic_handle_irq(irq);
 
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
-/*
- * Generic, controller-independent functions:
- */
-
-int show_interrupts(struct seq_file *p, void *v)
+int arch_show_interrupts(struct seq_file *p, int prec)
 {
-	int i = *(loff_t *) v, j;
-	struct irqaction * action;
-	unsigned long flags;
+	int j;
 
-	if (i == 0) {
-		seq_printf(p, "           ");
-		for_each_online_cpu(j)
-			seq_printf(p, "CPU%d       ",j);
-		seq_putc(p, '\n');
-	}
-
-	if (i < NR_IRQS) {
-		raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
-		action = irq_desc[i].action;
-		if (!action)
-			goto skip;
-		seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
-		seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-#endif
-		seq_printf(p, " %14s", irq_desc[i].chip->name);
-		seq_printf(p, "  %s", action->name);
-
-		for (action=action->next; action; action = action->next)
-			seq_printf(p, ", %s", action->name);
-
-		seq_putc(p, '\n');
-skip:
-		raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
-		seq_printf(p, "NMI: ");
-		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", nmi_count(j));
-		seq_putc(p, '\n');
-		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
-	}
+	seq_printf(p, "%*s: ", prec, "NMI");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", nmi_count(j));
+	seq_putc(p, '\n');
+	seq_printf(p, "%*s: ", prec, "ERR");
+	seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
 	return 0;
 }
 
-static void xtensa_irq_mask(unsigned int irq)
+static void xtensa_irq_mask(struct irq_chip *d)
 {
-	cached_irq_mask &= ~(1 << irq);
+	cached_irq_mask &= ~(1 << d->irq);
 	set_sr (cached_irq_mask, INTENABLE);
 }
 
-static void xtensa_irq_unmask(unsigned int irq)
+static void xtensa_irq_unmask(struct irq_chip *d)
 {
-	cached_irq_mask |= 1 << irq;
+	cached_irq_mask |= 1 << d->irq;
 	set_sr (cached_irq_mask, INTENABLE);
 }
 
-static void xtensa_irq_enable(unsigned int irq)
+static void xtensa_irq_enable(struct irq_chip *d)
 {
-	variant_irq_enable(irq);
-	xtensa_irq_unmask(irq);
+	variant_irq_enable(d->irq);
+	xtensa_irq_unmask(d->irq);
 }
 
-static void xtensa_irq_disable(unsigned int irq)
+static void xtensa_irq_disable(struct irq_chip *d)
 {
-	xtensa_irq_mask(irq);
-	variant_irq_disable(irq);
+	xtensa_irq_mask(d->irq);
+	variant_irq_disable(d->irq);
 }
 
-static void xtensa_irq_ack(unsigned int irq)
+static void xtensa_irq_ack(struct irq_chip *d)
 {
-	set_sr(1 << irq, INTCLEAR);
+	set_sr(1 << d->irq, INTCLEAR);
 }
 
-static int xtensa_irq_retrigger(unsigned int irq)
+static int xtensa_irq_retrigger(struct irq_chip *d)
 {
-	set_sr (1 << irq, INTSET);
+	set_sr (1 << d->irq, INTSET);
 	return 1;
 }
 
 
 static struct irq_chip xtensa_irq_chip = {
 	.name		= "xtensa",
-	.enable		= xtensa_irq_enable,
-	.disable	= xtensa_irq_disable,
-	.mask		= xtensa_irq_mask,
-	.unmask		= xtensa_irq_unmask,
-	.ack		= xtensa_irq_ack,
-	.retrigger	= xtensa_irq_retrigger,
+	.irq_enable	= xtensa_irq_enable,
+	.irq_disable	= xtensa_irq_disable,
+	.irq_mask	= xtensa_irq_mask,
+	.irq_unmask	= xtensa_irq_unmask,
+	.irq_ack	= xtensa_irq_ack,
+	.irq_retrigger	= xtensa_irq_retrigger,
 };
 
 void __init init_IRQ(void)
@@ -165,25 +129,25 @@
 		int mask = 1 << index;
 
 		if (mask & XCHAL_INTTYPE_MASK_SOFTWARE)
-			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+			irq_set_chip_and_handler(index, &xtensa_irq_chip,
 						 handle_simple_irq);
 
 		else if (mask & XCHAL_INTTYPE_MASK_EXTERN_EDGE)
-			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+			irq_set_chip_and_handler(index, &xtensa_irq_chip,
 						 handle_edge_irq);
 
 		else if (mask & XCHAL_INTTYPE_MASK_EXTERN_LEVEL)
-			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+			irq_set_chip_and_handler(index, &xtensa_irq_chip,
 						 handle_level_irq);
 
 		else if (mask & XCHAL_INTTYPE_MASK_TIMER)
-			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+			irq_set_chip_and_handler(index, &xtensa_irq_chip,
 						 handle_edge_irq);
 
 		else	/* XCHAL_INTTYPE_MASK_WRITE_ERROR */
 			/* XCHAL_INTTYPE_MASK_NMI */
 
-			set_irq_chip_and_handler(index, &xtensa_irq_chip,
+			irq_set_chip_and_handler(index, &xtensa_irq_chip,
 						 handle_level_irq);
 	}
 

diff --git a/arch/xtensa/platforms/s6105/device.c b/arch/xtensa/platforms/s6105/device.c
index 65333ff..4f4fc97 100644
--- a/arch/xtensa/platforms/s6105/device.c
+++ b/arch/xtensa/platforms/s6105/device.c

@@ -120,7 +120,7 @@
 	irq = gpio_to_irq(pin);
 	if (irq < 0)
 		goto free;
-	if (set_irq_type(irq, IRQ_TYPE_LEVEL_LOW) < 0)
+	if (irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW) < 0)
 		goto free;
 	return irq;
 free:

diff --git a/arch/xtensa/variants/s6000/gpio.c b/arch/xtensa/variants/s6000/gpio.c
index 380a70f..7af0757 100644
--- a/arch/xtensa/variants/s6000/gpio.c
+++ b/arch/xtensa/variants/s6000/gpio.c

@@ -85,30 +85,29 @@
 	return gpiochip_add(&gpiochip);
 }
 
-static void ack(unsigned int irq)
+static void ack(struct irq_data *d)
 {
-	writeb(1 << (irq - IRQ_BASE), S6_REG_GPIO + S6_GPIO_IC);
+	writeb(1 << (d->irq - IRQ_BASE), S6_REG_GPIO + S6_GPIO_IC);
 }
 
-static void mask(unsigned int irq)
+static void mask(struct irq_data *d)
 {
 	u8 r = readb(S6_REG_GPIO + S6_GPIO_IE);
-	r &= ~(1 << (irq - IRQ_BASE));
+	r &= ~(1 << (d->irq - IRQ_BASE));
 	writeb(r, S6_REG_GPIO + S6_GPIO_IE);
 }
 
-static void unmask(unsigned int irq)
+static void unmask(struct irq_data *d)
 {
 	u8 m = readb(S6_REG_GPIO + S6_GPIO_IE);
-	m |= 1 << (irq - IRQ_BASE);
+	m |= 1 << (d->irq - IRQ_BASE);
 	writeb(m, S6_REG_GPIO + S6_GPIO_IE);
 }
 
-static int set_type(unsigned int irq, unsigned int type)
+static int set_type(struct irq_data *d, unsigned int type)
 {
-	const u8 m = 1 << (irq - IRQ_BASE);
+	const u8 m = 1 << (d->irq - IRQ_BASE);
 	irq_flow_handler_t handler;
-	struct irq_desc *desc;
 	u8 reg;
 
 	if (type == IRQ_TYPE_PROBE) {
@@ -129,8 +128,7 @@
 		handler = handle_edge_irq;
 	}
 	writeb(reg, S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IS);
-	desc = irq_to_desc(irq);
-	desc->handle_irq = handler;
+	__irq_set_handler_locked(irq, handler);
 
 	reg = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_IEV);
 	if (type & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_EDGE_RISING))
@@ -150,22 +148,23 @@
 
 static struct irq_chip gpioirqs = {
 	.name = "GPIO",
-	.ack = ack,
-	.mask = mask,
-	.unmask = unmask,
-	.set_type = set_type,
+	.irq_ack = ack,
+	.irq_mask = mask,
+	.irq_unmask = unmask,
+	.irq_set_type = set_type,
 };
 
 static u8 demux_masks[4];
 
 static void demux_irqs(unsigned int irq, struct irq_desc *desc)
 {
-	u8 *mask = get_irq_desc_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	u8 *mask = irq_desc_get_handler_data(desc);
 	u8 pending;
 	int cirq;
 
-	desc->chip->mask(irq);
-	desc->chip->ack(irq);
+	chip->irq_mask(&desc->irq_data);
+	chip->irq_ack(&desc->irq_data));
 	pending = readb(S6_REG_GPIO + S6_GPIO_BANK(0) + S6_GPIO_MIS) & *mask;
 	cirq = IRQ_BASE - 1;
 	while (pending) {
@@ -174,7 +173,7 @@
 		pending >>= n;
 		generic_handle_irq(cirq);
 	}
-	desc->chip->unmask(irq);
+	chip->irq_unmask(&desc->irq_data));
 }
 
 extern const signed char *platform_irq_mappings[XTENSA_NR_IRQS];
@@ -219,11 +218,11 @@
 				i = ffs(mask);
 				cirq += i;
 				mask >>= i;
-				set_irq_chip(cirq, &gpioirqs);
-				set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+				irq_set_chip(cirq, &gpioirqs);
+				irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
 			} while (mask);
-			set_irq_data(irq, demux_masks + n);
-			set_irq_chained_handler(irq, demux_irqs);
+			irq_set_handler_data(irq, demux_masks + n);
+			irq_set_chained_handler(irq, demux_irqs);
 			if (++n == ARRAY_SIZE(demux_masks))
 				break;
 		}

diff --git a/block/blk-core.c b/block/blk-core.c
index 59b5c00..e0a0623 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -271,7 +271,7 @@
  **/
 void blk_stop_queue(struct request_queue *q)
 {
-	cancel_delayed_work(&q->delay_work);
+	__cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
@@ -2702,7 +2702,10 @@
 		/*
 		 * rq is already accounted, so use raw insert
 		 */
-		__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
+		if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
+			__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
+		else
+			__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
 	}
 
 	if (q) {

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 8524939..f911a2f 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c

@@ -32,6 +32,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
+#include <linux/pci-aspm.h>
 #include <linux/acpi.h>
 #include <linux/slab.h>
 #include <acpi/acpi_bus.h>
@@ -564,7 +565,7 @@
 	/* Indicate support for various _OSC capabilities. */
 	if (pci_ext_cfg_avail(root->bus->self))
 		flags |= OSC_EXT_PCI_CONFIG_SUPPORT;
-	if (pcie_aspm_enabled())
+	if (pcie_aspm_support_enabled())
 		flags |= OSC_ACTIVE_STATE_PWR_SUPPORT |
 			OSC_CLOCK_PWR_CAPABILITY_SUPPORT;
 	if (pci_msi_enabled())
@@ -591,12 +592,16 @@
 
 		status = acpi_pci_osc_control_set(device->handle, &flags,
 					OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL);
-		if (ACPI_SUCCESS(status))
+		if (ACPI_SUCCESS(status)) {
 			dev_info(root->bus->bridge,
 				"ACPI _OSC control (0x%02x) granted\n", flags);
-		else
+		} else {
 			dev_dbg(root->bus->bridge,
 				"ACPI _OSC request failed (code %d)\n", status);
+			printk(KERN_INFO "Unable to assume _OSC PCIe control. "
+				"Disabling ASPM\n");
+			pcie_no_aspm();
+		}
 	}
 
 	pci_acpi_add_bus_pm_notifier(device, root->bus);

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index d57e8d0..e9e5238 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig

@@ -168,4 +168,11 @@
 	bool
 	default n
 
+config ARCH_NO_SYSDEV_OPS
+	bool
+	---help---
+	  To be selected by architectures that don't use sysdev class or
+	  sysdev driver power management (suspend/resume) and shutdown
+	  operations.
+
 endmenu

diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index f6fb547..fbe72da 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c

@@ -329,7 +329,7 @@
 }
 
 
-
+#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
 /**
  *	sysdev_shutdown - Shut down all system devices.
  *
@@ -524,6 +524,7 @@
 	return 0;
 }
 EXPORT_SYMBOL_GPL(sysdev_resume);
+#endif /* CONFIG_ARCH_NO_SYSDEV_OPS */
 
 int __init system_bus_init(void)
 {

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 35658f4..9bf1398 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c

@@ -193,7 +193,7 @@
 	u64 *cfg_offset);
 static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
 	unsigned long *memory_bar);
-
+static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag);
 
 /* performant mode helper functions */
 static void  calc_bucket_map(int *bucket, int num_buckets, int nsgs,
@@ -231,7 +231,7 @@
  */
 static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c)
 {
-	if (likely(h->transMethod == CFGTBL_Trans_Performant))
+	if (likely(h->transMethod & CFGTBL_Trans_Performant))
 		c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
 }
 
@@ -556,6 +556,44 @@
 #define to_hba(n) container_of(n, struct ctlr_info, dev)
 #define to_drv(n) container_of(n, drive_info_struct, dev)
 
+/* List of controllers which cannot be reset on kexec with reset_devices */
+static u32 unresettable_controller[] = {
+	0x324a103C, /* Smart Array P712m */
+	0x324b103C, /* SmartArray P711m */
+	0x3223103C, /* Smart Array P800 */
+	0x3234103C, /* Smart Array P400 */
+	0x3235103C, /* Smart Array P400i */
+	0x3211103C, /* Smart Array E200i */
+	0x3212103C, /* Smart Array E200 */
+	0x3213103C, /* Smart Array E200i */
+	0x3214103C, /* Smart Array E200i */
+	0x3215103C, /* Smart Array E200i */
+	0x3237103C, /* Smart Array E500 */
+	0x323D103C, /* Smart Array P700m */
+	0x409C0E11, /* Smart Array 6400 */
+	0x409D0E11, /* Smart Array 6400 EM */
+};
+
+static int ctlr_is_resettable(struct ctlr_info *h)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
+		if (unresettable_controller[i] == h->board_id)
+			return 0;
+	return 1;
+}
+
+static ssize_t host_show_resettable(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct ctlr_info *h = to_hba(dev);
+
+	return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h));
+}
+static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL);
+
 static ssize_t host_store_rescan(struct device *dev,
 				 struct device_attribute *attr,
 				 const char *buf, size_t count)
@@ -741,6 +779,7 @@
 
 static struct attribute *cciss_host_attrs[] = {
 	&dev_attr_rescan.attr,
+	&dev_attr_resettable.attr,
 	NULL
 };
 
@@ -973,8 +1012,8 @@
 	temp64.val32.upper = c->ErrDesc.Addr.upper;
 	pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
 			    c->err_info, (dma_addr_t) temp64.val);
-	pci_free_consistent(h->pdev, sizeof(CommandList_struct),
-			    c, (dma_addr_t) c->busaddr);
+	pci_free_consistent(h->pdev, sizeof(CommandList_struct), c,
+		(dma_addr_t) cciss_tag_discard_error_bits(h, (u32) c->busaddr));
 }
 
 static inline ctlr_info_t *get_host(struct gendisk *disk)
@@ -1490,8 +1529,7 @@
 		return -EINVAL;
 	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
-	ioc = (BIG_IOCTL_Command_struct *)
-	    kmalloc(sizeof(*ioc), GFP_KERNEL);
+	ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
 	if (!ioc) {
 		status = -ENOMEM;
 		goto cleanup1;
@@ -2653,6 +2691,10 @@
 			c->Request.CDB[0]);
 		return_status = IO_NEEDS_RETRY;
 		break;
+	case CMD_UNABORTABLE:
+		dev_warn(&h->pdev->dev, "cmd unabortable\n");
+		return_status = IO_ERROR;
+		break;
 	default:
 		dev_warn(&h->pdev->dev, "cmd 0x%02x returned "
 		       "unknown status %x\n", c->Request.CDB[0],
@@ -3103,6 +3145,13 @@
 			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
 				DID_PASSTHROUGH : DID_ERROR);
 		break;
+	case CMD_UNABORTABLE:
+		dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC ?
+				DID_PASSTHROUGH : DID_ERROR);
+		break;
 	default:
 		dev_warn(&h->pdev->dev, "cmd %p returned "
 		       "unknown status %x\n", cmd,
@@ -3136,10 +3185,13 @@
 	return tag >> DIRECT_LOOKUP_SHIFT;
 }
 
-static inline u32 cciss_tag_discard_error_bits(u32 tag)
+static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag)
 {
-#define CCISS_ERROR_BITS 0x03
-	return tag & ~CCISS_ERROR_BITS;
+#define CCISS_PERF_ERROR_BITS ((1 << DIRECT_LOOKUP_SHIFT) - 1)
+#define CCISS_SIMPLE_ERROR_BITS 0x03
+	if (likely(h->transMethod & CFGTBL_Trans_Performant))
+		return tag & ~CCISS_PERF_ERROR_BITS;
+	return tag & ~CCISS_SIMPLE_ERROR_BITS;
 }
 
 static inline void cciss_mark_tag_indexed(u32 *tag)
@@ -3359,7 +3411,7 @@
 {
 	u32 a;
 
-	if (unlikely(h->transMethod != CFGTBL_Trans_Performant))
+	if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant)))
 		return h->access.command_completed(h);
 
 	if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
@@ -3394,14 +3446,12 @@
 /* process completion of a non-indexed command */
 static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
 {
-	u32 tag;
 	CommandList_struct *c = NULL;
 	__u32 busaddr_masked, tag_masked;
 
-	tag = cciss_tag_discard_error_bits(raw_tag);
+	tag_masked = cciss_tag_discard_error_bits(h, raw_tag);
 	list_for_each_entry(c, &h->cmpQ, list) {
-		busaddr_masked = cciss_tag_discard_error_bits(c->busaddr);
-		tag_masked = cciss_tag_discard_error_bits(tag);
+		busaddr_masked = cciss_tag_discard_error_bits(h, c->busaddr);
 		if (busaddr_masked == tag_masked) {
 			finish_cmd(h, c, raw_tag);
 			return next_command(h);
@@ -3753,7 +3803,8 @@
 	}
 }
 
-static __devinit void cciss_enter_performant_mode(ctlr_info_t *h)
+static __devinit void cciss_enter_performant_mode(ctlr_info_t *h,
+	u32 use_short_tags)
 {
 	/* This is a bit complicated.  There are 8 registers on
 	 * the controller which we write to to tell it 8 different
@@ -3808,7 +3859,7 @@
 	writel(0, &h->transtable->RepQCtrAddrHigh32);
 	writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32);
 	writel(0, &h->transtable->RepQAddr0High32);
-	writel(CFGTBL_Trans_Performant,
+	writel(CFGTBL_Trans_Performant | use_short_tags,
 			&(h->cfgtable->HostWrite.TransportRequest));
 
 	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
@@ -3855,7 +3906,8 @@
 	if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL))
 		goto clean_up;
 
-	cciss_enter_performant_mode(h);
+	cciss_enter_performant_mode(h,
+		trans_support & CFGTBL_Trans_use_short_tags);
 
 	/* Change the access methods to the performant access methods */
 	h->access = SA5_performant_access;

diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 579f749..554bbd9 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h

@@ -222,6 +222,7 @@
 			h->ctlr, c->busaddr);
 #endif /* CCISS_DEBUG */
          writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
+	readl(h->vaddr + SA5_REQUEST_PORT_OFFSET);
 	 h->commands_outstanding++;
 	 if ( h->commands_outstanding > h->max_outstanding)
 		h->max_outstanding = h->commands_outstanding;

diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 35463d2..cd441be 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h

@@ -56,6 +56,7 @@
 
 #define CFGTBL_Trans_Simple     0x00000002l
 #define CFGTBL_Trans_Performant 0x00000004l
+#define CFGTBL_Trans_use_short_tags 0x20000000l
 
 #define CFGTBL_BusType_Ultra2   0x00000001l
 #define CFGTBL_BusType_Ultra3   0x00000002l

diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 727d022..df79380 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c

@@ -824,13 +824,18 @@
 			break;
 			case CMD_UNSOLICITED_ABORT:
 				cmd->result = DID_ABORT << 16;
-				dev_warn(&h->pdev->dev, "%p aborted do to an "
+				dev_warn(&h->pdev->dev, "%p aborted due to an "
 					"unsolicited abort\n", c);
 			break;
 			case CMD_TIMEOUT:
 				cmd->result = DID_TIME_OUT << 16;
 				dev_warn(&h->pdev->dev, "%p timedout\n", c);
 			break;
+			case CMD_UNABORTABLE:
+				cmd->result = DID_ERROR << 16;
+				dev_warn(&h->pdev->dev, "c %p command "
+					"unabortable\n", c);
+			break;
 			default:
 				cmd->result = DID_ERROR << 16;
 				dev_warn(&h->pdev->dev,
@@ -1007,11 +1012,15 @@
 		break;
 		case CMD_UNSOLICITED_ABORT:
 			dev_warn(&h->pdev->dev,
-				"%p aborted do to an unsolicited abort\n", c);
+				"%p aborted due to an unsolicited abort\n", c);
 		break;
 		case CMD_TIMEOUT:
 			dev_warn(&h->pdev->dev, "%p timedout\n", c);
 		break;
+		case CMD_UNABORTABLE:
+			dev_warn(&h->pdev->dev,
+				"%p unabortable\n", c);
+		break;
 		default:
 			dev_warn(&h->pdev->dev,
 				"%p returned unknown status %x\n",

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index aca3024..2a1642b 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c

@@ -92,7 +92,7 @@
 	bio->bi_end_io = drbd_md_io_complete;
 	bio->bi_rw = rw;
 
-	if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
+	if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
 		bio_endio(bio, -EIO);
 	else
 		submit_bio(rw, bio);
@@ -176,13 +176,17 @@
 	struct lc_element *al_ext;
 	struct lc_element *tmp;
 	unsigned long     al_flags = 0;
+	int wake;
 
 	spin_lock_irq(&mdev->al_lock);
 	tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
 	if (unlikely(tmp != NULL)) {
 		struct bm_extent  *bm_ext = lc_entry(tmp, struct bm_extent, lce);
 		if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
+			wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
 			spin_unlock_irq(&mdev->al_lock);
+			if (wake)
+				wake_up(&mdev->al_wait);
 			return NULL;
 		}
 	}
@@ -258,6 +262,33 @@
 	spin_unlock_irqrestore(&mdev->al_lock, flags);
 }
 
+#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
+/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
+ * are still coupled, or assume too much about their relation.
+ * Code below will not work if this is violated.
+ * Will be cleaned up with some followup patch.
+ */
+# error FIXME
+#endif
+
+static unsigned int al_extent_to_bm_page(unsigned int al_enr)
+{
+	return al_enr >>
+		/* bit to page */
+		((PAGE_SHIFT + 3) -
+		/* al extent number to bit */
+		 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
+}
+
+static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
+{
+	return rs_enr >>
+		/* bit to page */
+		((PAGE_SHIFT + 3) -
+		/* al extent number to bit */
+		 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
+}
+
 int
 w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
 {
@@ -285,7 +316,7 @@
 	 * For now, we must not write the transaction,
 	 * if we cannot write out the bitmap of the evicted extent. */
 	if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
-		drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
+		drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted));
 
 	/* The bitmap write may have failed, causing a state change. */
 	if (mdev->state.disk < D_INCONSISTENT) {
@@ -334,7 +365,7 @@
 		+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
 
 	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
-		drbd_chk_io_error(mdev, 1, TRUE);
+		drbd_chk_io_error(mdev, 1, true);
 
 	if (++mdev->al_tr_pos >
 	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
@@ -511,225 +542,6 @@
 	return 1;
 }
 
-static void atodb_endio(struct bio *bio, int error)
-{
-	struct drbd_atodb_wait *wc = bio->bi_private;
-	struct drbd_conf *mdev = wc->mdev;
-	struct page *page;
-	int uptodate = bio_flagged(bio, BIO_UPTODATE);
-
-	/* strange behavior of some lower level drivers...
-	 * fail the request by clearing the uptodate flag,
-	 * but do not return any error?! */
-	if (!error && !uptodate)
-		error = -EIO;
-
-	drbd_chk_io_error(mdev, error, TRUE);
-	if (error && wc->error == 0)
-		wc->error = error;
-
-	if (atomic_dec_and_test(&wc->count))
-		complete(&wc->io_done);
-
-	page = bio->bi_io_vec[0].bv_page;
-	put_page(page);
-	bio_put(bio);
-	mdev->bm_writ_cnt++;
-	put_ldev(mdev);
-}
-
-/* sector to word */
-#define S2W(s)	((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
-
-/* activity log to on disk bitmap -- prepare bio unless that sector
- * is already covered by previously prepared bios */
-static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
-					struct bio **bios,
-					unsigned int enr,
-					struct drbd_atodb_wait *wc) __must_hold(local)
-{
-	struct bio *bio;
-	struct page *page;
-	sector_t on_disk_sector;
-	unsigned int page_offset = PAGE_SIZE;
-	int offset;
-	int i = 0;
-	int err = -ENOMEM;
-
-	/* We always write aligned, full 4k blocks,
-	 * so we can ignore the logical_block_size (for now) */
-	enr &= ~7U;
-	on_disk_sector = enr + mdev->ldev->md.md_offset
-			     + mdev->ldev->md.bm_offset;
-
-	D_ASSERT(!(on_disk_sector & 7U));
-
-	/* Check if that enr is already covered by an already created bio.
-	 * Caution, bios[] is not NULL terminated,
-	 * but only initialized to all NULL.
-	 * For completely scattered activity log,
-	 * the last invocation iterates over all bios,
-	 * and finds the last NULL entry.
-	 */
-	while ((bio = bios[i])) {
-		if (bio->bi_sector == on_disk_sector)
-			return 0;
-		i++;
-	}
-	/* bios[i] == NULL, the next not yet used slot */
-
-	/* GFP_KERNEL, we are not in the write-out path */
-	bio = bio_alloc(GFP_KERNEL, 1);
-	if (bio == NULL)
-		return -ENOMEM;
-
-	if (i > 0) {
-		const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec;
-		page_offset = prev_bv->bv_offset + prev_bv->bv_len;
-		page = prev_bv->bv_page;
-	}
-	if (page_offset == PAGE_SIZE) {
-		page = alloc_page(__GFP_HIGHMEM);
-		if (page == NULL)
-			goto out_bio_put;
-		page_offset = 0;
-	} else {
-		get_page(page);
-	}
-
-	offset = S2W(enr);
-	drbd_bm_get_lel(mdev, offset,
-			min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset),
-			kmap(page) + page_offset);
-	kunmap(page);
-
-	bio->bi_private = wc;
-	bio->bi_end_io = atodb_endio;
-	bio->bi_bdev = mdev->ldev->md_bdev;
-	bio->bi_sector = on_disk_sector;
-
-	if (bio_add_page(bio, page, 4096, page_offset) != 4096)
-		goto out_put_page;
-
-	atomic_inc(&wc->count);
-	/* we already know that we may do this...
-	 * get_ldev_if_state(mdev,D_ATTACHING);
-	 * just get the extra reference, so that the local_cnt reflects
-	 * the number of pending IO requests DRBD at its backing device.
-	 */
-	atomic_inc(&mdev->local_cnt);
-
-	bios[i] = bio;
-
-	return 0;
-
-out_put_page:
-	err = -EINVAL;
-	put_page(page);
-out_bio_put:
-	bio_put(bio);
-	return err;
-}
-
-/**
- * drbd_al_to_on_disk_bm() -  * Writes bitmap parts covered by active AL extents
- * @mdev:	DRBD device.
- *
- * Called when we detach (unconfigure) local storage,
- * or when we go from R_PRIMARY to R_SECONDARY role.
- */
-void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
-{
-	int i, nr_elements;
-	unsigned int enr;
-	struct bio **bios;
-	struct drbd_atodb_wait wc;
-
-	ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING))
-		return; /* sorry, I don't have any act_log etc... */
-
-	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
-
-	nr_elements = mdev->act_log->nr_elements;
-
-	/* GFP_KERNEL, we are not in anyone's write-out path */
-	bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL);
-	if (!bios)
-		goto submit_one_by_one;
-
-	atomic_set(&wc.count, 0);
-	init_completion(&wc.io_done);
-	wc.mdev = mdev;
-	wc.error = 0;
-
-	for (i = 0; i < nr_elements; i++) {
-		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
-		if (enr == LC_FREE)
-			continue;
-		/* next statement also does atomic_inc wc.count and local_cnt */
-		if (atodb_prepare_unless_covered(mdev, bios,
-						enr/AL_EXT_PER_BM_SECT,
-						&wc))
-			goto free_bios_submit_one_by_one;
-	}
-
-	/* unnecessary optimization? */
-	lc_unlock(mdev->act_log);
-	wake_up(&mdev->al_wait);
-
-	/* all prepared, submit them */
-	for (i = 0; i < nr_elements; i++) {
-		if (bios[i] == NULL)
-			break;
-		if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) {
-			bios[i]->bi_rw = WRITE;
-			bio_endio(bios[i], -EIO);
-		} else {
-			submit_bio(WRITE, bios[i]);
-		}
-	}
-
-	/* always (try to) flush bitmap to stable storage */
-	drbd_md_flush(mdev);
-
-	/* In case we did not submit a single IO do not wait for
-	 * them to complete. ( Because we would wait forever here. )
-	 *
-	 * In case we had IOs and they are already complete, there
-	 * is not point in waiting anyways.
-	 * Therefore this if () ... */
-	if (atomic_read(&wc.count))
-		wait_for_completion(&wc.io_done);
-
-	put_ldev(mdev);
-
-	kfree(bios);
-	return;
-
- free_bios_submit_one_by_one:
-	/* free everything by calling the endio callback directly. */
-	for (i = 0; i < nr_elements && bios[i]; i++)
-		bio_endio(bios[i], 0);
-
-	kfree(bios);
-
- submit_one_by_one:
-	dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n");
-
-	for (i = 0; i < mdev->act_log->nr_elements; i++) {
-		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
-		if (enr == LC_FREE)
-			continue;
-		/* Really slow: if we have al-extents 16..19 active,
-		 * sector 4 will be written four times! Synchronous! */
-		drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT);
-	}
-
-	lc_unlock(mdev->act_log);
-	wake_up(&mdev->al_wait);
-	put_ldev(mdev);
-}
-
 /**
  * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
  * @mdev:	DRBD device.
@@ -809,7 +621,7 @@
 		return 1;
 	}
 
-	drbd_bm_write_sect(mdev, udw->enr);
+	drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
 	put_ldev(mdev);
 
 	kfree(udw);
@@ -889,7 +701,6 @@
 				dev_warn(DEV, "Kicking resync_lru element enr=%u "
 				     "out with rs_failed=%d\n",
 				     ext->lce.lc_number, ext->rs_failed);
-				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
 			}
 			ext->rs_left = rs_left;
 			ext->rs_failed = success ? 0 : count;
@@ -908,7 +719,6 @@
 				drbd_queue_work_front(&mdev->data.work, &udw->w);
 			} else {
 				dev_warn(DEV, "Could not kmalloc an udw\n");
-				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
 			}
 		}
 	} else {
@@ -919,6 +729,22 @@
 	}
 }
 
+void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go)
+{
+	unsigned long now = jiffies;
+	unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
+	int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
+	if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
+		if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go &&
+		    mdev->state.conn != C_PAUSED_SYNC_T &&
+		    mdev->state.conn != C_PAUSED_SYNC_S) {
+			mdev->rs_mark_time[next] = now;
+			mdev->rs_mark_left[next] = still_to_go;
+			mdev->rs_last_mark = next;
+		}
+	}
+}
+
 /* clear the bit corresponding to the piece of storage in question:
  * size byte of data starting from sector.  Only clear a bits of the affected
  * one ore more _aligned_ BM_BLOCK_SIZE blocks.
@@ -936,7 +762,7 @@
 	int wake_up = 0;
 	unsigned long flags;
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
 				(unsigned long long)sector, size);
 		return;
@@ -969,21 +795,9 @@
 	 */
 	count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
 	if (count && get_ldev(mdev)) {
-		unsigned long now = jiffies;
-		unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
-		int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
-		if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
-			unsigned long tw = drbd_bm_total_weight(mdev);
-			if (mdev->rs_mark_left[mdev->rs_last_mark] != tw &&
-			    mdev->state.conn != C_PAUSED_SYNC_T &&
-			    mdev->state.conn != C_PAUSED_SYNC_S) {
-				mdev->rs_mark_time[next] = now;
-				mdev->rs_mark_left[next] = tw;
-				mdev->rs_last_mark = next;
-			}
-		}
+		drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev));
 		spin_lock_irqsave(&mdev->al_lock, flags);
-		drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
+		drbd_try_clear_on_disk_bm(mdev, sector, count, true);
 		spin_unlock_irqrestore(&mdev->al_lock, flags);
 
 		/* just wake_up unconditional now, various lc_chaged(),
@@ -998,27 +812,27 @@
 /*
  * this is intended to set one request worth of data out of sync.
  * affects at least 1 bit,
- * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits.
+ * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
  *
  * called by tl_clear and drbd_send_dblock (==drbd_make_request).
  * so this can be _any_ process.
  */
-void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
+int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
 			    const char *file, const unsigned int line)
 {
 	unsigned long sbnr, ebnr, lbnr, flags;
 	sector_t esector, nr_sectors;
-	unsigned int enr, count;
+	unsigned int enr, count = 0;
 	struct lc_element *e;
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "sector: %llus, size: %d\n",
 			(unsigned long long)sector, size);
-		return;
+		return 0;
 	}
 
 	if (!get_ldev(mdev))
-		return; /* no disk, no metadata, no bitmap to set bits in */
+		return 0; /* no disk, no metadata, no bitmap to set bits in */
 
 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
 	esector = sector + (size >> 9) - 1;
@@ -1048,6 +862,8 @@
 
 out:
 	put_ldev(mdev);
+
+	return count;
 }
 
 static
@@ -1128,7 +944,10 @@
 	unsigned int enr = BM_SECT_TO_EXT(sector);
 	struct bm_extent *bm_ext;
 	int i, sig;
+	int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
+			 200 times -> 20 seconds. */
 
+retry:
 	sig = wait_event_interruptible(mdev->al_wait,
 			(bm_ext = _bme_get(mdev, enr)));
 	if (sig)
@@ -1139,16 +958,25 @@
 
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
 		sig = wait_event_interruptible(mdev->al_wait,
-				!_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i));
-		if (sig) {
+					       !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) ||
+					       test_bit(BME_PRIORITY, &bm_ext->flags));
+
+		if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
 			spin_lock_irq(&mdev->al_lock);
 			if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
-				clear_bit(BME_NO_WRITES, &bm_ext->flags);
+				bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
 				mdev->resync_locked--;
 				wake_up(&mdev->al_wait);
 			}
 			spin_unlock_irq(&mdev->al_lock);
-			return -EINTR;
+			if (sig)
+				return -EINTR;
+			if (schedule_timeout_interruptible(HZ/10))
+				return -EINTR;
+			if (sa && --sa == 0)
+				dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec."
+					 "Resync stalled?\n");
+			goto retry;
 		}
 	}
 	set_bit(BME_LOCKED, &bm_ext->flags);
@@ -1291,8 +1119,7 @@
 	}
 
 	if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
-		clear_bit(BME_LOCKED, &bm_ext->flags);
-		clear_bit(BME_NO_WRITES, &bm_ext->flags);
+		bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
 		mdev->resync_locked--;
 		wake_up(&mdev->al_wait);
 	}
@@ -1383,7 +1210,7 @@
 	sector_t esector, nr_sectors;
 	int wake_up = 0;
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
 				(unsigned long long)sector, size);
 		return;
@@ -1420,7 +1247,7 @@
 		mdev->rs_failed += count;
 
 		if (get_ldev(mdev)) {
-			drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE);
+			drbd_try_clear_on_disk_bm(mdev, sector, count, false);
 			put_ldev(mdev);
 		}
 

diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 0645ca8..f0ae63d 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c

@@ -28,18 +28,58 @@
 #include <linux/drbd.h>
 #include <linux/slab.h>
 #include <asm/kmap_types.h>
+
+#include <asm-generic/bitops/le.h>
+
 #include "drbd_int.h"
 
+
 /* OPAQUE outside this file!
  * interface defined in drbd_int.h
 
  * convention:
  * function name drbd_bm_... => used elsewhere, "public".
  * function name      bm_... => internal to implementation, "private".
+ */
 
- * Note that since find_first_bit returns int, at the current granularity of
- * the bitmap (4KB per byte), this implementation "only" supports up to
- * 1<<(32+12) == 16 TB...
+
+/*
+ * LIMITATIONS:
+ * We want to support >= peta byte of backend storage, while for now still using
+ * a granularity of one bit per 4KiB of storage.
+ * 1 << 50		bytes backend storage (1 PiB)
+ * 1 << (50 - 12)	bits needed
+ *	38 --> we need u64 to index and count bits
+ * 1 << (38 - 3)	bitmap bytes needed
+ *	35 --> we still need u64 to index and count bytes
+ *			(that's 32 GiB of bitmap for 1 PiB storage)
+ * 1 << (35 - 2)	32bit longs needed
+ *	33 --> we'd even need u64 to index and count 32bit long words.
+ * 1 << (35 - 3)	64bit longs needed
+ *	32 --> we could get away with a 32bit unsigned int to index and count
+ *	64bit long words, but I rather stay with unsigned long for now.
+ *	We probably should neither count nor point to bytes or long words
+ *	directly, but either by bitnumber, or by page index and offset.
+ * 1 << (35 - 12)
+ *	22 --> we need that much 4KiB pages of bitmap.
+ *	1 << (22 + 3) --> on a 64bit arch,
+ *	we need 32 MiB to store the array of page pointers.
+ *
+ * Because I'm lazy, and because the resulting patch was too large, too ugly
+ * and still incomplete, on 32bit we still "only" support 16 TiB (minus some),
+ * (1 << 32) bits * 4k storage.
+ *
+
+ * bitmap storage and IO:
+ *	Bitmap is stored little endian on disk, and is kept little endian in
+ *	core memory. Currently we still hold the full bitmap in core as long
+ *	as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
+ *	seems excessive.
+ *
+ *	We plan to reduce the amount of in-core bitmap pages by pageing them in
+ *	and out against their on-disk location as necessary, but need to make
+ *	sure we don't cause too much meta data IO, and must not deadlock in
+ *	tight memory situations. This needs some more work.
  */
 
 /*
@@ -55,13 +95,9 @@
 struct drbd_bitmap {
 	struct page **bm_pages;
 	spinlock_t bm_lock;
-	/* WARNING unsigned long bm_*:
-	 * 32bit number of bit offset is just enough for 512 MB bitmap.
-	 * it will blow up if we make the bitmap bigger...
-	 * not that it makes much sense to have a bitmap that large,
-	 * rather change the granularity to 16k or 64k or something.
-	 * (that implies other problems, however...)
-	 */
+
+	/* see LIMITATIONS: above */
+
 	unsigned long bm_set;       /* nr of set bits; THINK maybe atomic_t? */
 	unsigned long bm_bits;
 	size_t   bm_words;
@@ -69,29 +105,18 @@
 	sector_t bm_dev_capacity;
 	struct mutex bm_change; /* serializes resize operations */
 
-	atomic_t bm_async_io;
-	wait_queue_head_t bm_io_wait;
+	wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */
 
-	unsigned long  bm_flags;
+	enum bm_flag bm_flags;
 
 	/* debugging aid, in case we are still racy somewhere */
 	char          *bm_why;
 	struct task_struct *bm_task;
 };
 
-/* definition of bits in bm_flags */
-#define BM_LOCKED       0
-#define BM_MD_IO_ERROR  1
-#define BM_P_VMALLOCED  2
-
 static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
 			       unsigned long e, int val, const enum km_type km);
 
-static int bm_is_locked(struct drbd_bitmap *b)
-{
-	return test_bit(BM_LOCKED, &b->bm_flags);
-}
-
 #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
 static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
 {
@@ -108,7 +133,7 @@
 	    b->bm_task == mdev->worker.task   ? "worker"   : "?");
 }
 
-void drbd_bm_lock(struct drbd_conf *mdev, char *why)
+void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	int trylock_failed;
@@ -131,8 +156,9 @@
 		    b->bm_task == mdev->worker.task   ? "worker"   : "?");
 		mutex_lock(&b->bm_change);
 	}
-	if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
+	if (BM_LOCKED_MASK & b->bm_flags)
 		dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
+	b->bm_flags |= flags & BM_LOCKED_MASK;
 
 	b->bm_why  = why;
 	b->bm_task = current;
@@ -146,31 +172,137 @@
 		return;
 	}
 
-	if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags))
+	if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags))
 		dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
 
+	b->bm_flags &= ~BM_LOCKED_MASK;
 	b->bm_why  = NULL;
 	b->bm_task = NULL;
 	mutex_unlock(&b->bm_change);
 }
 
-/* word offset to long pointer */
-static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
+/* we store some "meta" info about our pages in page->private */
+/* at a granularity of 4k storage per bitmap bit:
+ * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks
+ *  1<<38 bits,
+ *  1<<23 4k bitmap pages.
+ * Use 24 bits as page index, covers 2 peta byte storage
+ * at a granularity of 4k per bit.
+ * Used to report the failed page idx on io error from the endio handlers.
+ */
+#define BM_PAGE_IDX_MASK	((1UL<<24)-1)
+/* this page is currently read in, or written back */
+#define BM_PAGE_IO_LOCK		31
+/* if there has been an IO error for this page */
+#define BM_PAGE_IO_ERROR	30
+/* this is to be able to intelligently skip disk IO,
+ * set if bits have been set since last IO. */
+#define BM_PAGE_NEED_WRITEOUT	29
+/* to mark for lazy writeout once syncer cleared all clearable bits,
+ * we if bits have been cleared since last IO. */
+#define BM_PAGE_LAZY_WRITEOUT	28
+
+/* store_page_idx uses non-atomic assingment. It is only used directly after
+ * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
+ * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
+ * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
+ * requires it all to be atomic as well. */
+static void bm_store_page_idx(struct page *page, unsigned long idx)
 {
-	struct page *page;
-	unsigned long page_nr;
+	BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
+	page_private(page) |= idx;
+}
 
+static unsigned long bm_page_to_idx(struct page *page)
+{
+	return page_private(page) & BM_PAGE_IDX_MASK;
+}
+
+/* As is very unlikely that the same page is under IO from more than one
+ * context, we can get away with a bit per page and one wait queue per bitmap.
+ */
+static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	void *addr = &page_private(b->bm_pages[page_nr]);
+	wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr));
+}
+
+static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	void *addr = &page_private(b->bm_pages[page_nr]);
+	clear_bit(BM_PAGE_IO_LOCK, addr);
+	smp_mb__after_clear_bit();
+	wake_up(&mdev->bitmap->bm_io_wait);
+}
+
+/* set _before_ submit_io, so it may be reset due to being changed
+ * while this page is in flight... will get submitted later again */
+static void bm_set_page_unchanged(struct page *page)
+{
+	/* use cmpxchg? */
+	clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
+	clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
+}
+
+static void bm_set_page_need_writeout(struct page *page)
+{
+	set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
+}
+
+static int bm_test_page_unchanged(struct page *page)
+{
+	volatile const unsigned long *addr = &page_private(page);
+	return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0;
+}
+
+static void bm_set_page_io_err(struct page *page)
+{
+	set_bit(BM_PAGE_IO_ERROR, &page_private(page));
+}
+
+static void bm_clear_page_io_err(struct page *page)
+{
+	clear_bit(BM_PAGE_IO_ERROR, &page_private(page));
+}
+
+static void bm_set_page_lazy_writeout(struct page *page)
+{
+	set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
+}
+
+static int bm_test_page_lazy_writeout(struct page *page)
+{
+	return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
+}
+
+/* on a 32bit box, this would allow for exactly (2<<38) bits. */
+static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr)
+{
 	/* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
-	page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
+	unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3);
 	BUG_ON(page_nr >= b->bm_number_of_pages);
-	page = b->bm_pages[page_nr];
+	return page_nr;
+}
 
+static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
+{
+	/* page_nr = (bitnr/8) >> PAGE_SHIFT; */
+	unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3);
+	BUG_ON(page_nr >= b->bm_number_of_pages);
+	return page_nr;
+}
+
+static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km)
+{
+	struct page *page = b->bm_pages[idx];
 	return (unsigned long *) kmap_atomic(page, km);
 }
 
-static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset)
+static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
 {
-	return __bm_map_paddr(b, offset, KM_IRQ1);
+	return __bm_map_pidx(b, idx, KM_IRQ1);
 }
 
 static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
@@ -202,6 +334,7 @@
  * to be able to report device specific.
  */
 
+
 static void bm_free_pages(struct page **pages, unsigned long number)
 {
 	unsigned long i;
@@ -269,6 +402,9 @@
 				bm_vk_free(new_pages, vmalloced);
 				return NULL;
 			}
+			/* we want to know which page it is
+			 * from the endio handlers */
+			bm_store_page_idx(page, i);
 			new_pages[i] = page;
 		}
 	} else {
@@ -280,9 +416,9 @@
 	}
 
 	if (vmalloced)
-		set_bit(BM_P_VMALLOCED, &b->bm_flags);
+		b->bm_flags |= BM_P_VMALLOCED;
 	else
-		clear_bit(BM_P_VMALLOCED, &b->bm_flags);
+		b->bm_flags &= ~BM_P_VMALLOCED;
 
 	return new_pages;
 }
@@ -319,7 +455,7 @@
 {
 	ERR_IF (!mdev->bitmap) return;
 	bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
-	bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags));
+	bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
 	kfree(mdev->bitmap);
 	mdev->bitmap = NULL;
 }
@@ -329,22 +465,39 @@
  * this masks out the remaining bits.
  * Returns the number of bits cleared.
  */
+#define BITS_PER_PAGE		(1UL << (PAGE_SHIFT + 3))
+#define BITS_PER_PAGE_MASK	(BITS_PER_PAGE - 1)
+#define BITS_PER_LONG_MASK	(BITS_PER_LONG - 1)
 static int bm_clear_surplus(struct drbd_bitmap *b)
 {
-	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
-	size_t w = b->bm_bits >> LN2_BPL;
-	int cleared = 0;
+	unsigned long mask;
 	unsigned long *p_addr, *bm;
+	int tmp;
+	int cleared = 0;
 
-	p_addr = bm_map_paddr(b, w);
-	bm = p_addr + MLPP(w);
-	if (w < b->bm_words) {
+	/* number of bits modulo bits per page */
+	tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
+	/* mask the used bits of the word containing the last bit */
+	mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
+	/* bitmap is always stored little endian,
+	 * on disk and in core memory alike */
+	mask = cpu_to_lel(mask);
+
+	p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
+	bm = p_addr + (tmp/BITS_PER_LONG);
+	if (mask) {
+		/* If mask != 0, we are not exactly aligned, so bm now points
+		 * to the long containing the last bit.
+		 * If mask == 0, bm already points to the word immediately
+		 * after the last (long word aligned) bit. */
 		cleared = hweight_long(*bm & ~mask);
 		*bm &= mask;
-		w++; bm++;
+		bm++;
 	}
 
-	if (w < b->bm_words) {
+	if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
+		/* on a 32bit arch, we may need to zero out
+		 * a padding long to align with a 64bit remote */
 		cleared += hweight_long(*bm);
 		*bm = 0;
 	}
@@ -354,66 +507,75 @@
 
 static void bm_set_surplus(struct drbd_bitmap *b)
 {
-	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
-	size_t w = b->bm_bits >> LN2_BPL;
+	unsigned long mask;
 	unsigned long *p_addr, *bm;
+	int tmp;
 
-	p_addr = bm_map_paddr(b, w);
-	bm = p_addr + MLPP(w);
-	if (w < b->bm_words) {
+	/* number of bits modulo bits per page */
+	tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
+	/* mask the used bits of the word containing the last bit */
+	mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
+	/* bitmap is always stored little endian,
+	 * on disk and in core memory alike */
+	mask = cpu_to_lel(mask);
+
+	p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
+	bm = p_addr + (tmp/BITS_PER_LONG);
+	if (mask) {
+		/* If mask != 0, we are not exactly aligned, so bm now points
+		 * to the long containing the last bit.
+		 * If mask == 0, bm already points to the word immediately
+		 * after the last (long word aligned) bit. */
 		*bm |= ~mask;
-		bm++; w++;
+		bm++;
 	}
 
-	if (w < b->bm_words) {
-		*bm = ~(0UL);
+	if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
+		/* on a 32bit arch, we may need to zero out
+		 * a padding long to align with a 64bit remote */
+		*bm = ~0UL;
 	}
 	bm_unmap(p_addr);
 }
 
-static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
-{
-	unsigned long *p_addr, *bm, offset = 0;
-	unsigned long bits = 0;
-	unsigned long i, do_now;
-
-	while (offset < b->bm_words) {
-		i = do_now = min_t(size_t, b->bm_words-offset, LWPP);
-		p_addr = __bm_map_paddr(b, offset, KM_USER0);
-		bm = p_addr + MLPP(offset);
-		while (i--) {
-#ifndef __LITTLE_ENDIAN
-			if (swap_endian)
-				*bm = lel_to_cpu(*bm);
-#endif
-			bits += hweight_long(*bm++);
-		}
-		__bm_unmap(p_addr, KM_USER0);
-		offset += do_now;
-		cond_resched();
-	}
-
-	return bits;
-}
-
+/* you better not modify the bitmap while this is running,
+ * or its results will be stale */
 static unsigned long bm_count_bits(struct drbd_bitmap *b)
 {
-	return __bm_count_bits(b, 0);
-}
+	unsigned long *p_addr;
+	unsigned long bits = 0;
+	unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
+	int idx, i, last_word;
 
-static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b)
-{
-	return __bm_count_bits(b, 1);
+	/* all but last page */
+	for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
+		p_addr = __bm_map_pidx(b, idx, KM_USER0);
+		for (i = 0; i < LWPP; i++)
+			bits += hweight_long(p_addr[i]);
+		__bm_unmap(p_addr, KM_USER0);
+		cond_resched();
+	}
+	/* last (or only) page */
+	last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
+	p_addr = __bm_map_pidx(b, idx, KM_USER0);
+	for (i = 0; i < last_word; i++)
+		bits += hweight_long(p_addr[i]);
+	p_addr[last_word] &= cpu_to_lel(mask);
+	bits += hweight_long(p_addr[last_word]);
+	/* 32bit arch, may have an unused padding long */
+	if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
+		p_addr[last_word+1] = 0;
+	__bm_unmap(p_addr, KM_USER0);
+	return bits;
 }
 
 /* offset and len in long words.*/
 static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
 {
 	unsigned long *p_addr, *bm;
+	unsigned int idx;
 	size_t do_now, end;
 
-#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
-
 	end = offset + len;
 
 	if (end > b->bm_words) {
@@ -423,15 +585,16 @@
 
 	while (offset < end) {
 		do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
-		p_addr = bm_map_paddr(b, offset);
+		idx = bm_word_to_page_idx(b, offset);
+		p_addr = bm_map_pidx(b, idx);
 		bm = p_addr + MLPP(offset);
 		if (bm+do_now > p_addr + LWPP) {
 			printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
 			       p_addr, bm, (int)do_now);
-			break; /* breaks to after catch_oob_access_end() only! */
-		}
-		memset(bm, c, do_now * sizeof(long));
+		} else
+			memset(bm, c, do_now * sizeof(long));
 		bm_unmap(p_addr);
+		bm_set_page_need_writeout(b->bm_pages[idx]);
 		offset += do_now;
 	}
 }
@@ -447,7 +610,7 @@
 int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long bits, words, owords, obits, *p_addr, *bm;
+	unsigned long bits, words, owords, obits;
 	unsigned long want, have, onpages; /* number of pages */
 	struct page **npages, **opages = NULL;
 	int err = 0, growing;
@@ -455,7 +618,7 @@
 
 	ERR_IF(!b) return -ENOMEM;
 
-	drbd_bm_lock(mdev, "resize");
+	drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
 
 	dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
 			(unsigned long long)capacity);
@@ -463,7 +626,7 @@
 	if (capacity == b->bm_dev_capacity)
 		goto out;
 
-	opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags);
+	opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
 
 	if (capacity == 0) {
 		spin_lock_irq(&b->bm_lock);
@@ -491,18 +654,23 @@
 	words = ALIGN(bits, 64) >> LN2_BPL;
 
 	if (get_ldev(mdev)) {
-		D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12));
+		u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12;
 		put_ldev(mdev);
+		if (bits > bits_on_disk) {
+			dev_info(DEV, "bits = %lu\n", bits);
+			dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk);
+			err = -ENOSPC;
+			goto out;
+		}
 	}
 
-	/* one extra long to catch off by one errors */
-	want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
+	want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
 	have = b->bm_number_of_pages;
 	if (want == have) {
 		D_ASSERT(b->bm_pages != NULL);
 		npages = b->bm_pages;
 	} else {
-		if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC))
+		if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC))
 			npages = NULL;
 		else
 			npages = bm_realloc_pages(b, want);
@@ -542,11 +710,6 @@
 		bm_free_pages(opages + want, have - want);
 	}
 
-	p_addr = bm_map_paddr(b, words);
-	bm = p_addr + MLPP(words);
-	*bm = DRBD_MAGIC;
-	bm_unmap(p_addr);
-
 	(void)bm_clear_surplus(b);
 
 	spin_unlock_irq(&b->bm_lock);
@@ -554,7 +717,7 @@
 		bm_vk_free(opages, opages_vmalloced);
 	if (!growing)
 		b->bm_set = bm_count_bits(b);
-	dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words);
+	dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
 
  out:
 	drbd_bm_unlock(mdev);
@@ -624,6 +787,7 @@
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long *p_addr, *bm;
 	unsigned long word, bits;
+	unsigned int idx;
 	size_t end, do_now;
 
 	end = offset + number;
@@ -638,16 +802,18 @@
 	spin_lock_irq(&b->bm_lock);
 	while (offset < end) {
 		do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
-		p_addr = bm_map_paddr(b, offset);
+		idx = bm_word_to_page_idx(b, offset);
+		p_addr = bm_map_pidx(b, idx);
 		bm = p_addr + MLPP(offset);
 		offset += do_now;
 		while (do_now--) {
 			bits = hweight_long(*bm);
-			word = *bm | lel_to_cpu(*buffer++);
+			word = *bm | *buffer++;
 			*bm++ = word;
 			b->bm_set += hweight_long(word) - bits;
 		}
 		bm_unmap(p_addr);
+		bm_set_page_need_writeout(b->bm_pages[idx]);
 	}
 	/* with 32bit <-> 64bit cross-platform connect
 	 * this is only correct for current usage,
@@ -656,7 +822,6 @@
 	 */
 	if (end == b->bm_words)
 		b->bm_set -= bm_clear_surplus(b);
-
 	spin_unlock_irq(&b->bm_lock);
 }
 
@@ -686,11 +851,11 @@
 	else {
 		while (offset < end) {
 			do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
-			p_addr = bm_map_paddr(b, offset);
+			p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset));
 			bm = p_addr + MLPP(offset);
 			offset += do_now;
 			while (do_now--)
-				*buffer++ = cpu_to_lel(*bm++);
+				*buffer++ = *bm++;
 			bm_unmap(p_addr);
 		}
 	}
@@ -724,9 +889,22 @@
 	spin_unlock_irq(&b->bm_lock);
 }
 
+struct bm_aio_ctx {
+	struct drbd_conf *mdev;
+	atomic_t in_flight;
+	struct completion done;
+	unsigned flags;
+#define BM_AIO_COPY_PAGES	1
+	int error;
+};
+
+/* bv_page may be a copy, or may be the original */
 static void bm_async_io_complete(struct bio *bio, int error)
 {
-	struct drbd_bitmap *b = bio->bi_private;
+	struct bm_aio_ctx *ctx = bio->bi_private;
+	struct drbd_conf *mdev = ctx->mdev;
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 
 
@@ -737,38 +915,83 @@
 	if (!error && !uptodate)
 		error = -EIO;
 
+	if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
+	    !bm_test_page_unchanged(b->bm_pages[idx]))
+		dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx);
+
 	if (error) {
-		/* doh. what now?
-		 * for now, set all bits, and flag MD_IO_ERROR */
-		__set_bit(BM_MD_IO_ERROR, &b->bm_flags);
+		/* ctx error will hold the completed-last non-zero error code,
+		 * in case error codes differ. */
+		ctx->error = error;
+		bm_set_page_io_err(b->bm_pages[idx]);
+		/* Not identical to on disk version of it.
+		 * Is BM_PAGE_IO_ERROR enough? */
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n",
+					error, idx);
+	} else {
+		bm_clear_page_io_err(b->bm_pages[idx]);
+		dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx);
 	}
-	if (atomic_dec_and_test(&b->bm_async_io))
-		wake_up(&b->bm_io_wait);
+
+	bm_page_unlock_io(mdev, idx);
+
+	/* FIXME give back to page pool */
+	if (ctx->flags & BM_AIO_COPY_PAGES)
+		put_page(bio->bi_io_vec[0].bv_page);
 
 	bio_put(bio);
+
+	if (atomic_dec_and_test(&ctx->in_flight))
+		complete(&ctx->done);
 }
 
-static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
+static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
 {
 	/* we are process context. we always get a bio */
 	struct bio *bio = bio_alloc(GFP_KERNEL, 1);
+	struct drbd_conf *mdev = ctx->mdev;
+	struct drbd_bitmap *b = mdev->bitmap;
+	struct page *page;
 	unsigned int len;
+
 	sector_t on_disk_sector =
 		mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
 	on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
 
 	/* this might happen with very small
-	 * flexible external meta data device */
+	 * flexible external meta data device,
+	 * or with PAGE_SIZE > 4k */
 	len = min_t(unsigned int, PAGE_SIZE,
 		(drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
 
+	/* serialize IO on this page */
+	bm_page_lock_io(mdev, page_nr);
+	/* before memcpy and submit,
+	 * so it can be redirtied any time */
+	bm_set_page_unchanged(b->bm_pages[page_nr]);
+
+	if (ctx->flags & BM_AIO_COPY_PAGES) {
+		/* FIXME alloc_page is good enough for now, but actually needs
+		 * to use pre-allocated page pool */
+		void *src, *dest;
+		page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT);
+		dest = kmap_atomic(page, KM_USER0);
+		src = kmap_atomic(b->bm_pages[page_nr], KM_USER1);
+		memcpy(dest, src, PAGE_SIZE);
+		kunmap_atomic(src, KM_USER1);
+		kunmap_atomic(dest, KM_USER0);
+		bm_store_page_idx(page, page_nr);
+	} else
+		page = b->bm_pages[page_nr];
+
 	bio->bi_bdev = mdev->ldev->md_bdev;
 	bio->bi_sector = on_disk_sector;
-	bio_add_page(bio, b->bm_pages[page_nr], len, 0);
-	bio->bi_private = b;
+	bio_add_page(bio, page, len, 0);
+	bio->bi_private = ctx;
 	bio->bi_end_io = bm_async_io_complete;
 
-	if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
+	if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
 		bio->bi_rw |= rw;
 		bio_endio(bio, -EIO);
 	} else {
@@ -776,87 +999,84 @@
 	}
 }
 
-# if defined(__LITTLE_ENDIAN)
-	/* nothing to do, on disk == in memory */
-# define bm_cpu_to_lel(x) ((void)0)
-# else
-static void bm_cpu_to_lel(struct drbd_bitmap *b)
-{
-	/* need to cpu_to_lel all the pages ...
-	 * this may be optimized by using
-	 * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
-	 * the following is still not optimal, but better than nothing */
-	unsigned int i;
-	unsigned long *p_addr, *bm;
-	if (b->bm_set == 0) {
-		/* no page at all; avoid swap if all is 0 */
-		i = b->bm_number_of_pages;
-	} else if (b->bm_set == b->bm_bits) {
-		/* only the last page */
-		i = b->bm_number_of_pages - 1;
-	} else {
-		/* all pages */
-		i = 0;
-	}
-	for (; i < b->bm_number_of_pages; i++) {
-		p_addr = kmap_atomic(b->bm_pages[i], KM_USER0);
-		for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++)
-			*bm = cpu_to_lel(*bm);
-		kunmap_atomic(p_addr, KM_USER0);
-	}
-}
-# endif
-/* lel_to_cpu == cpu_to_lel */
-# define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
-
 /*
  * bm_rw: read/write the whole bitmap from/to its on disk location.
  */
-static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
+static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local)
 {
+	struct bm_aio_ctx ctx = {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
+		.flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0,
+	};
 	struct drbd_bitmap *b = mdev->bitmap;
-	/* sector_t sector; */
-	int bm_words, num_pages, i;
+	int num_pages, i, count = 0;
 	unsigned long now;
 	char ppb[10];
 	int err = 0;
 
-	WARN_ON(!bm_is_locked(b));
+	/*
+	 * We are protected against bitmap disappearing/resizing by holding an
+	 * ldev reference (caller must have called get_ldev()).
+	 * For read/write, we are protected against changes to the bitmap by
+	 * the bitmap lock (see drbd_bitmap_io).
+	 * For lazy writeout, we don't care for ongoing changes to the bitmap,
+	 * as we submit copies of pages anyways.
+	 */
+	if (!ctx.flags)
+		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
 
-	/* no spinlock here, the drbd_bm_lock should be enough! */
-
-	bm_words  = drbd_bm_words(mdev);
-	num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT;
-
-	/* on disk bitmap is little endian */
-	if (rw == WRITE)
-		bm_cpu_to_lel(b);
+	num_pages = b->bm_number_of_pages;
 
 	now = jiffies;
-	atomic_set(&b->bm_async_io, num_pages);
-	__clear_bit(BM_MD_IO_ERROR, &b->bm_flags);
 
 	/* let the layers below us try to merge these bios... */
-	for (i = 0; i < num_pages; i++)
-		bm_page_io_async(mdev, b, i, rw);
+	for (i = 0; i < num_pages; i++) {
+		/* ignore completely unchanged pages */
+		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
+			break;
+		if (rw & WRITE) {
+			if (bm_test_page_unchanged(b->bm_pages[i])) {
+				dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
+				continue;
+			}
+			/* during lazy writeout,
+			 * ignore those pages not marked for lazy writeout. */
+			if (lazy_writeout_upper_idx &&
+			    !bm_test_page_lazy_writeout(b->bm_pages[i])) {
+				dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i);
+				continue;
+			}
+		}
+		atomic_inc(&ctx.in_flight);
+		bm_page_io_async(&ctx, i, rw);
+		++count;
+		cond_resched();
+	}
 
-	wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
+	/*
+	 * We initialize ctx.in_flight to one to make sure bm_async_io_complete
+	 * will not complete() early, and decrement / test it here.  If there
+	 * are still some bios in flight, we need to wait for them here.
+	 */
+	if (!atomic_dec_and_test(&ctx.in_flight))
+		wait_for_completion(&ctx.done);
+	dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
+			rw == WRITE ? "WRITE" : "READ",
+			count, jiffies - now);
 
-	if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
+	if (ctx.error) {
 		dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
-		drbd_chk_io_error(mdev, 1, TRUE);
-		err = -EIO;
+		drbd_chk_io_error(mdev, 1, true);
+		err = -EIO; /* ctx.error ? */
 	}
 
 	now = jiffies;
 	if (rw == WRITE) {
-		/* swap back endianness */
-		bm_lel_to_cpu(b);
-		/* flush bitmap to stable storage */
 		drbd_md_flush(mdev);
 	} else /* rw == READ */ {
-		/* just read, if necessary adjust endianness */
-		b->bm_set = bm_count_bits_swap_endian(b);
+		b->bm_set = bm_count_bits(b);
 		dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
 		     jiffies - now);
 	}
@@ -874,112 +1094,128 @@
  */
 int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
 {
-	return bm_rw(mdev, READ);
+	return bm_rw(mdev, READ, 0);
 }
 
 /**
  * drbd_bm_write() - Write the whole bitmap to its on disk location.
  * @mdev:	DRBD device.
+ *
+ * Will only write pages that have changed since last IO.
  */
 int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
 {
-	return bm_rw(mdev, WRITE);
+	return bm_rw(mdev, WRITE, 0);
 }
 
 /**
- * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap
+ * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
  * @mdev:	DRBD device.
- * @enr:	Extent number in the resync lru (happens to be sector offset)
- *
- * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered
- * by a single sector write. Therefore enr == sector offset from the
- * start of the bitmap.
+ * @upper_idx:	0: write all changed pages; +ve: page index to stop scanning for changed pages
  */
-int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local)
+int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)
 {
-	sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
-				      + mdev->ldev->md.bm_offset;
-	int bm_words, num_words, offset;
-	int err = 0;
+	return bm_rw(mdev, WRITE, upper_idx);
+}
 
-	mutex_lock(&mdev->md_io_mutex);
-	bm_words  = drbd_bm_words(mdev);
-	offset    = S2W(enr);	/* word offset into bitmap */
-	num_words = min(S2W(1), bm_words - offset);
-	if (num_words < S2W(1))
-		memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE);
-	drbd_bm_get_lel(mdev, offset, num_words,
-			page_address(mdev->md_io_page));
-	if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) {
-		int i;
-		err = -EIO;
-		dev_err(DEV, "IO ERROR writing bitmap sector %lu "
-		    "(meta-disk sector %llus)\n",
-		    enr, (unsigned long long)on_disk_sector);
-		drbd_chk_io_error(mdev, 1, TRUE);
-		for (i = 0; i < AL_EXT_PER_BM_SECT; i++)
-			drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i);
+
+/**
+ * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
+ * @mdev:	DRBD device.
+ * @idx:	bitmap page index
+ *
+ * We don't want to special case on logical_block_size of the backend device,
+ * so we submit PAGE_SIZE aligned pieces.
+ * Note that on "most" systems, PAGE_SIZE is 4k.
+ *
+ * In case this becomes an issue on systems with larger PAGE_SIZE,
+ * we may want to change this again to write 4k aligned 4k pieces.
+ */
+int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
+{
+	struct bm_aio_ctx ctx = {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
+		.flags = BM_AIO_COPY_PAGES,
+	};
+
+	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
+		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
+		return 0;
 	}
+
+	bm_page_io_async(&ctx, idx, WRITE_SYNC);
+	wait_for_completion(&ctx.done);
+
+	if (ctx.error)
+		drbd_chk_io_error(mdev, 1, true);
+		/* that should force detach, so the in memory bitmap will be
+		 * gone in a moment as well. */
+
 	mdev->bm_writ_cnt++;
-	mutex_unlock(&mdev->md_io_mutex);
-	return err;
+	return ctx.error;
 }
 
 /* NOTE
  * find_first_bit returns int, we return unsigned long.
- * should not make much difference anyways, but ...
+ * For this to work on 32bit arch with bitnumbers > (1<<32),
+ * we'd need to return u64, and get a whole lot of other places
+ * fixed where we still use unsigned long.
  *
  * this returns a bit number, NOT a sector!
  */
-#define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1)
 static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
 	const int find_zero_bit, const enum km_type km)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long i = -1UL;
 	unsigned long *p_addr;
-	unsigned long bit_offset; /* bit offset of the mapped page. */
+	unsigned long bit_offset;
+	unsigned i;
+
 
 	if (bm_fo > b->bm_bits) {
 		dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
+		bm_fo = DRBD_END_OF_BITMAP;
 	} else {
 		while (bm_fo < b->bm_bits) {
-			unsigned long offset;
-			bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */
-			offset = bit_offset >> LN2_BPL;    /* word offset of the page */
-			p_addr = __bm_map_paddr(b, offset, km);
+			/* bit offset of the first bit in the page */
+			bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
+			p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km);
 
 			if (find_zero_bit)
-				i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
+				i = generic_find_next_zero_le_bit(p_addr,
+						PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
 			else
-				i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
+				i = generic_find_next_le_bit(p_addr,
+						PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
 
 			__bm_unmap(p_addr, km);
 			if (i < PAGE_SIZE*8) {
-				i = bit_offset + i;
-				if (i >= b->bm_bits)
+				bm_fo = bit_offset + i;
+				if (bm_fo >= b->bm_bits)
 					break;
 				goto found;
 			}
 			bm_fo = bit_offset + PAGE_SIZE*8;
 		}
-		i = -1UL;
+		bm_fo = DRBD_END_OF_BITMAP;
 	}
  found:
-	return i;
+	return bm_fo;
 }
 
 static unsigned long bm_find_next(struct drbd_conf *mdev,
 	unsigned long bm_fo, const int find_zero_bit)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long i = -1UL;
+	unsigned long i = DRBD_END_OF_BITMAP;
 
 	ERR_IF(!b) return i;
 	ERR_IF(!b->bm_pages) return i;
 
 	spin_lock_irq(&b->bm_lock);
-	if (bm_is_locked(b))
+	if (BM_DONT_TEST & b->bm_flags)
 		bm_print_lock_info(mdev);
 
 	i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
@@ -1005,13 +1241,13 @@
  * you must take drbd_bm_lock() first */
 unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
 {
-	/* WARN_ON(!bm_is_locked(mdev)); */
+	/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
 	return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
 }
 
 unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
 {
-	/* WARN_ON(!bm_is_locked(mdev)); */
+	/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
 	return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
 }
 
@@ -1027,8 +1263,9 @@
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long *p_addr = NULL;
 	unsigned long bitnr;
-	unsigned long last_page_nr = -1UL;
+	unsigned int last_page_nr = -1U;
 	int c = 0;
+	int changed_total = 0;
 
 	if (e >= b->bm_bits) {
 		dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
@@ -1036,23 +1273,33 @@
 		e = b->bm_bits ? b->bm_bits -1 : 0;
 	}
 	for (bitnr = s; bitnr <= e; bitnr++) {
-		unsigned long offset = bitnr>>LN2_BPL;
-		unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
+		unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
 		if (page_nr != last_page_nr) {
 			if (p_addr)
 				__bm_unmap(p_addr, km);
-			p_addr = __bm_map_paddr(b, offset, km);
+			if (c < 0)
+				bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
+			else if (c > 0)
+				bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
+			changed_total += c;
+			c = 0;
+			p_addr = __bm_map_pidx(b, page_nr, km);
 			last_page_nr = page_nr;
 		}
 		if (val)
-			c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr));
+			c += (0 == generic___test_and_set_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr));
 		else
-			c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr));
+			c -= (0 != generic___test_and_clear_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr));
 	}
 	if (p_addr)
 		__bm_unmap(p_addr, km);
-	b->bm_set += c;
-	return c;
+	if (c < 0)
+		bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
+	else if (c > 0)
+		bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
+	changed_total += c;
+	b->bm_set += changed_total;
+	return changed_total;
 }
 
 /* returns number of bits actually changed.
@@ -1070,7 +1317,7 @@
 	ERR_IF(!b->bm_pages) return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
-	if (bm_is_locked(b))
+	if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
 		bm_print_lock_info(mdev);
 
 	c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1);
@@ -1187,12 +1434,11 @@
 	ERR_IF(!b->bm_pages) return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
-	if (bm_is_locked(b))
+	if (BM_DONT_TEST & b->bm_flags)
 		bm_print_lock_info(mdev);
 	if (bitnr < b->bm_bits) {
-		unsigned long offset = bitnr>>LN2_BPL;
-		p_addr = bm_map_paddr(b, offset);
-		i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0;
+		p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
+		i = generic_test_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
 		bm_unmap(p_addr);
 	} else if (bitnr == b->bm_bits) {
 		i = -1;
@@ -1210,10 +1456,10 @@
 {
 	unsigned long flags;
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr = NULL, page_nr = -1;
+	unsigned long *p_addr = NULL;
 	unsigned long bitnr;
+	unsigned int page_nr = -1U;
 	int c = 0;
-	size_t w;
 
 	/* If this is called without a bitmap, that is a bug.  But just to be
 	 * robust in case we screwed up elsewhere, in that case pretend there
@@ -1223,20 +1469,20 @@
 	ERR_IF(!b->bm_pages) return 1;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
-	if (bm_is_locked(b))
+	if (BM_DONT_TEST & b->bm_flags)
 		bm_print_lock_info(mdev);
 	for (bitnr = s; bitnr <= e; bitnr++) {
-		w = bitnr >> LN2_BPL;
-		if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) {
-			page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3);
+		unsigned int idx = bm_bit_to_page_idx(b, bitnr);
+		if (page_nr != idx) {
+			page_nr = idx;
 			if (p_addr)
 				bm_unmap(p_addr);
-			p_addr = bm_map_paddr(b, w);
+			p_addr = bm_map_pidx(b, idx);
 		}
 		ERR_IF (bitnr >= b->bm_bits) {
 			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
 		} else {
-			c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
+			c += (0 != generic_test_le_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
 		}
 	}
 	if (p_addr)
@@ -1271,7 +1517,7 @@
 	ERR_IF(!b->bm_pages) return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
-	if (bm_is_locked(b))
+	if (BM_DONT_TEST & b->bm_flags)
 		bm_print_lock_info(mdev);
 
 	s = S2W(enr);
@@ -1279,7 +1525,7 @@
 	count = 0;
 	if (s < b->bm_words) {
 		int n = e-s;
-		p_addr = bm_map_paddr(b, s);
+		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
 		bm = p_addr + MLPP(s);
 		while (n--)
 			count += hweight_long(*bm++);
@@ -1291,18 +1537,20 @@
 	return count;
 }
 
-/* set all bits covered by the AL-extent al_enr */
+/* Set all bits covered by the AL-extent al_enr.
+ * Returns number of bits changed. */
 unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long *p_addr, *bm;
 	unsigned long weight;
-	int count, s, e, i, do_now;
+	unsigned long s, e;
+	int count, i, do_now;
 	ERR_IF(!b) return 0;
 	ERR_IF(!b->bm_pages) return 0;
 
 	spin_lock_irq(&b->bm_lock);
-	if (bm_is_locked(b))
+	if (BM_DONT_SET & b->bm_flags)
 		bm_print_lock_info(mdev);
 	weight = b->bm_set;
 
@@ -1314,7 +1562,7 @@
 	count = 0;
 	if (s < b->bm_words) {
 		i = do_now = e-s;
-		p_addr = bm_map_paddr(b, s);
+		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
 		bm = p_addr + MLPP(s);
 		while (i--) {
 			count += hweight_long(*bm);
@@ -1326,7 +1574,7 @@
 		if (e == b->bm_words)
 			b->bm_set -= bm_clear_surplus(b);
 	} else {
-		dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s);
+		dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
 	}
 	weight = b->bm_set - weight;
 	spin_unlock_irq(&b->bm_lock);

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b0bd27d..81030d8 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h

@@ -72,13 +72,6 @@
 extern char usermode_helper[];
 
 
-#ifndef TRUE
-#define TRUE 1
-#endif
-#ifndef FALSE
-#define FALSE 0
-#endif
-
 /* I don't remember why XCPU ...
  * This is used to wake the asender,
  * and to interrupt sending the sending task
@@ -104,6 +97,7 @@
 #define ID_SYNCER (-1ULL)
 #define ID_VACANT 0
 #define is_syncer_block_id(id) ((id) == ID_SYNCER)
+#define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
 
 struct drbd_conf;
 
@@ -137,20 +131,19 @@
 	DRBD_FAULT_MAX,
 };
 
-#ifdef CONFIG_DRBD_FAULT_INJECTION
 extern unsigned int
 _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type);
+
 static inline int
 drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
+#ifdef CONFIG_DRBD_FAULT_INJECTION
 	return fault_rate &&
 		(enable_faults & (1<<type)) &&
 		_drbd_insert_fault(mdev, type);
-}
-#define FAULT_ACTIVE(_m, _t) (drbd_insert_fault((_m), (_t)))
-
 #else
-#define FAULT_ACTIVE(_m, _t) (0)
+	return 0;
 #endif
+}
 
 /* integer division, round _UP_ to the next integer */
 #define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0))
@@ -212,8 +205,10 @@
 	/* P_CKPT_FENCE_REQ      = 0x25, * currently reserved for protocol D */
 	/* P_CKPT_DISABLE_REQ    = 0x26, * currently reserved for protocol D */
 	P_DELAY_PROBE         = 0x27, /* is used on BOTH sockets */
+	P_OUT_OF_SYNC         = 0x28, /* Mark as out of sync (Outrunning), data socket */
+	P_RS_CANCEL           = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
 
-	P_MAX_CMD	      = 0x28,
+	P_MAX_CMD	      = 0x2A,
 	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
 	P_MAX_OPT_CMD	      = 0x101,
 
@@ -269,6 +264,7 @@
 		[P_RS_IS_IN_SYNC]	= "CsumRSIsInSync",
 		[P_COMPRESSED_BITMAP]   = "CBitmap",
 		[P_DELAY_PROBE]         = "DelayProbe",
+		[P_OUT_OF_SYNC]		= "OutOfSync",
 		[P_MAX_CMD]	        = NULL,
 	};
 
@@ -512,7 +508,7 @@
 	u64	    d_size;  /* size of disk */
 	u64	    u_size;  /* user requested size */
 	u64	    c_size;  /* current exported size */
-	u32	    max_segment_size;  /* Maximal size of a BIO */
+	u32	    max_bio_size;  /* Maximal size of a BIO */
 	u16	    queue_order_type;  /* not yet implemented in DRBD*/
 	u16	    dds_flags; /* use enum dds_flags here. */
 } __packed;
@@ -550,6 +546,13 @@
 	u32	    pad;
 } __packed;
 
+struct p_block_desc {
+	struct p_header80 head;
+	u64 sector;
+	u32 blksize;
+	u32 pad;	/* to multiple of 8 Byte */
+} __packed;
+
 /* Valid values for the encoding field.
  * Bump proto version when changing this. */
 enum drbd_bitmap_code {
@@ -647,6 +650,7 @@
         struct p_block_req       block_req;
 	struct p_delay_probe93   delay_probe93;
 	struct p_rs_uuid         rs_uuid;
+	struct p_block_desc      block_desc;
 } __packed;
 
 /**********************************************************************/
@@ -677,13 +681,6 @@
 	return thi->t_state;
 }
 
-
-/*
- * Having this as the first member of a struct provides sort of "inheritance".
- * "derived" structs can be "drbd_queue_work()"ed.
- * The callback should know and cast back to the descendant struct.
- * drbd_request and drbd_epoch_entry are descendants of drbd_work.
- */
 struct drbd_work;
 typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
 struct drbd_work {
@@ -712,9 +709,6 @@
 	 * starting a new epoch...
 	 */
 
-	/* up to here, the struct layout is identical to drbd_epoch_entry;
-	 * we might be able to use that to our advantage...  */
-
 	struct list_head tl_requests; /* ring list in the transfer log */
 	struct bio *master_bio;       /* master bio pointer */
 	unsigned long rq_state; /* see comments above _req_mod() */
@@ -831,7 +825,7 @@
 	CRASHED_PRIMARY,	/* This node was a crashed primary.
 				 * Gets cleared when the state.conn
 				 * goes into C_CONNECTED state. */
-	WRITE_BM_AFTER_RESYNC,	/* A kmalloc() during resync failed */
+	NO_BARRIER_SUPP,	/* underlying block device doesn't implement barriers */
 	CONSIDER_RESYNC,
 
 	MD_NO_FUA,		/* Users wants us to not use FUA/FLUSH on meta data dev */
@@ -856,10 +850,37 @@
 	GOT_PING_ACK,		/* set when we receive a ping_ack packet, misc wait gets woken */
 	NEW_CUR_UUID,		/* Create new current UUID when thawing IO */
 	AL_SUSPENDED,		/* Activity logging is currently suspended. */
+	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
 };
 
 struct drbd_bitmap; /* opaque for drbd_conf */
 
+/* definition of bits in bm_flags to be used in drbd_bm_lock
+ * and drbd_bitmap_io and friends. */
+enum bm_flag {
+	/* do we need to kfree, or vfree bm_pages? */
+	BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
+
+	/* currently locked for bulk operation */
+	BM_LOCKED_MASK = 0x7,
+
+	/* in detail, that is: */
+	BM_DONT_CLEAR = 0x1,
+	BM_DONT_SET   = 0x2,
+	BM_DONT_TEST  = 0x4,
+
+	/* (test bit, count bit) allowed (common case) */
+	BM_LOCKED_TEST_ALLOWED = 0x3,
+
+	/* testing bits, as well as setting new bits allowed, but clearing bits
+	 * would be unexpected.  Used during bitmap receive.  Setting new bits
+	 * requires sending of "out-of-sync" information, though. */
+	BM_LOCKED_SET_ALLOWED = 0x1,
+
+	/* clear is not expected while bitmap is locked for bulk operation */
+};
+
+
 /* TODO sort members for performance
  * MAYBE group them further */
 
@@ -925,6 +946,7 @@
 struct bm_io_work {
 	struct drbd_work w;
 	char *why;
+	enum bm_flag flags;
 	int (*io_fn)(struct drbd_conf *mdev);
 	void (*done)(struct drbd_conf *mdev, int rv);
 };
@@ -963,9 +985,12 @@
 	struct drbd_work  resync_work,
 			  unplug_work,
 			  go_diskless,
-			  md_sync_work;
+			  md_sync_work,
+			  start_resync_work;
 	struct timer_list resync_timer;
 	struct timer_list md_sync_timer;
+	struct timer_list start_resync_timer;
+	struct timer_list request_timer;
 #ifdef DRBD_DEBUG_MD_SYNC
 	struct {
 		unsigned int line;
@@ -1000,9 +1025,9 @@
 	struct hlist_head *tl_hash;
 	unsigned int tl_hash_s;
 
-	/* blocks to sync in this run [unit BM_BLOCK_SIZE] */
+	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
 	unsigned long rs_total;
-	/* number of sync IOs that failed in this run */
+	/* number of resync blocks that failed in this run */
 	unsigned long rs_failed;
 	/* Syncer's start time [unit jiffies] */
 	unsigned long rs_start;
@@ -1102,6 +1127,7 @@
 	struct fifo_buffer rs_plan_s; /* correction values of resync planer */
 	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
 	int rs_planed;    /* resync sectors already planed */
+	atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
 };
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1163,14 +1189,19 @@
 };
 
 extern void drbd_init_set_defaults(struct drbd_conf *mdev);
-extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
-			union drbd_state mask, union drbd_state val);
+extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
+					    enum chg_state_flags f,
+					    union drbd_state mask,
+					    union drbd_state val);
 extern void drbd_force_state(struct drbd_conf *, union drbd_state,
 			union drbd_state);
-extern int _drbd_request_state(struct drbd_conf *, union drbd_state,
-			union drbd_state, enum chg_state_flags);
-extern int __drbd_set_state(struct drbd_conf *, union drbd_state,
-			    enum chg_state_flags, struct completion *done);
+extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
+					      union drbd_state,
+					      union drbd_state,
+					      enum chg_state_flags);
+extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
+					   enum chg_state_flags,
+					   struct completion *done);
 extern void print_st_err(struct drbd_conf *, union drbd_state,
 			union drbd_state, int);
 extern int  drbd_thread_start(struct drbd_thread *thi);
@@ -1195,7 +1226,7 @@
 extern int drbd_send_protocol(struct drbd_conf *mdev);
 extern int drbd_send_uuids(struct drbd_conf *mdev);
 extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
-extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val);
+extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
 extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
 extern int _drbd_send_state(struct drbd_conf *mdev);
 extern int drbd_send_state(struct drbd_conf *mdev);
@@ -1220,11 +1251,10 @@
 			struct p_data *dp, int data_size);
 extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
 			    sector_t sector, int blksize, u64 block_id);
+extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req);
 extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 			   struct drbd_epoch_entry *e);
 extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
-extern int _drbd_send_barrier(struct drbd_conf *mdev,
-			struct drbd_tl_epoch *barrier);
 extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
 			      sector_t sector, int size, u64 block_id);
 extern int drbd_send_drequest_csum(struct drbd_conf *mdev,
@@ -1235,14 +1265,13 @@
 
 extern int drbd_send_bitmap(struct drbd_conf *mdev);
 extern int _drbd_send_bitmap(struct drbd_conf *mdev);
-extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode);
+extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode);
 extern void drbd_free_bc(struct drbd_backing_dev *ldev);
 extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
-/* drbd_meta-data.c (still in drbd_main.c) */
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
-/* maybe define them below as inline? */
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
 extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
 extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
@@ -1261,10 +1290,12 @@
 extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
 				 int (*io_fn)(struct drbd_conf *),
 				 void (*done)(struct drbd_conf *, int),
-				 char *why);
+				 char *why, enum bm_flag flags);
+extern int drbd_bitmap_io(struct drbd_conf *mdev,
+		int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags);
 extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
 extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
-extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
 extern void drbd_go_diskless(struct drbd_conf *mdev);
 extern void drbd_ldev_destroy(struct drbd_conf *mdev);
 
@@ -1313,6 +1344,7 @@
 
 #define BME_NO_WRITES  0  /* bm_extent.flags: no more requests on this one! */
 #define BME_LOCKED     1  /* bm_extent.flags: syncer active on this one. */
+#define BME_PRIORITY   2  /* finish resync IO on this extent ASAP! App IO waiting! */
 
 /* drbd_bitmap.c */
 /*
@@ -1390,7 +1422,9 @@
  * you should use 64bit OS for that much storage, anyways. */
 #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
 #else
-#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32)
+/* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */
+#define DRBD_MAX_SECTORS_FLEX (1UL << 51)
+/* corresponds to (1UL << 38) bits right now. */
 #endif
 #endif
 
@@ -1398,7 +1432,7 @@
  * With a value of 8 all IO in one 128K block make it to the same slot of the
  * hash table. */
 #define HT_SHIFT 8
-#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT))
+#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
 
 #define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
 
@@ -1410,16 +1444,20 @@
 extern void drbd_bm_cleanup(struct drbd_conf *mdev);
 extern void drbd_bm_set_all(struct drbd_conf *mdev);
 extern void drbd_bm_clear_all(struct drbd_conf *mdev);
+/* set/clear/test only a few bits at a time */
 extern int  drbd_bm_set_bits(
 		struct drbd_conf *mdev, unsigned long s, unsigned long e);
 extern int  drbd_bm_clear_bits(
 		struct drbd_conf *mdev, unsigned long s, unsigned long e);
-/* bm_set_bits variant for use while holding drbd_bm_lock */
+extern int drbd_bm_count_bits(
+	struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
+/* bm_set_bits variant for use while holding drbd_bm_lock,
+ * may process the whole bitmap in one go */
 extern void _drbd_bm_set_bits(struct drbd_conf *mdev,
 		const unsigned long s, const unsigned long e);
 extern int  drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
 extern int  drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
-extern int  drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local);
+extern int  drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
 extern int  drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
 extern int  drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
 extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
@@ -1427,6 +1465,8 @@
 extern size_t	     drbd_bm_words(struct drbd_conf *mdev);
 extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
 extern sector_t      drbd_bm_capacity(struct drbd_conf *mdev);
+
+#define DRBD_END_OF_BITMAP	(~(unsigned long)0)
 extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
 /* bm_find_next variants for use while you hold drbd_bm_lock() */
 extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
@@ -1437,14 +1477,12 @@
 /* for receive_bitmap */
 extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset,
 		size_t number, unsigned long *buffer);
-/* for _drbd_send_bitmap and drbd_bm_write_sect */
+/* for _drbd_send_bitmap */
 extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
 		size_t number, unsigned long *buffer);
 
-extern void drbd_bm_lock(struct drbd_conf *mdev, char *why);
+extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags);
 extern void drbd_bm_unlock(struct drbd_conf *mdev);
-
-extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
 /* drbd_main.c */
 
 extern struct kmem_cache *drbd_request_cache;
@@ -1467,7 +1505,7 @@
 extern int proc_details;
 
 /* drbd_req */
-extern int drbd_make_request_26(struct request_queue *q, struct bio *bio);
+extern int drbd_make_request(struct request_queue *q, struct bio *bio);
 extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
 extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
 extern int is_valid_ar_handle(struct drbd_request *, sector_t);
@@ -1482,8 +1520,9 @@
 extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
-extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
-		int force);
+extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
+					enum drbd_role new_role,
+					int force);
 extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
 extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
 extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
@@ -1499,6 +1538,7 @@
 extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
 		struct drbd_backing_dev *bdev, sector_t sector, int rw);
 extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
+extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
 
 static inline void ov_oos_print(struct drbd_conf *mdev)
 {
@@ -1522,21 +1562,23 @@
 extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int);
 extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
 extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
-extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int);
+extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int);
 extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
 extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
-extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int);
 extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
 extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
 extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
 extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
 extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
 extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int);
+extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int);
 
 extern void resync_timer_fn(unsigned long data);
+extern void start_resync_timer_fn(unsigned long data);
 
 /* drbd_receiver.c */
-extern int drbd_rs_should_slow_down(struct drbd_conf *mdev);
+extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
 extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
 		const unsigned rw, const int fault_type);
 extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
@@ -1619,16 +1661,16 @@
 extern void drbd_rs_failed_io(struct drbd_conf *mdev,
 		sector_t sector, int size);
 extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
+extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go);
 extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
 		int size, const char *file, const unsigned int line);
 #define drbd_set_in_sync(mdev, sector, size) \
 	__drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
-extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
+extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 		int size, const char *file, const unsigned int line);
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
-extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev);
 extern void drbd_al_shrink(struct drbd_conf *mdev);
 
 
@@ -1747,11 +1789,11 @@
 	wake_up(&mdev->misc_wait);
 }
 
-static inline int _drbd_set_state(struct drbd_conf *mdev,
-				   union drbd_state ns, enum chg_state_flags flags,
-				   struct completion *done)
+static inline enum drbd_state_rv
+_drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+		enum chg_state_flags flags, struct completion *done)
 {
-	int rv;
+	enum drbd_state_rv rv;
 
 	read_lock(&global_state_lock);
 	rv = __drbd_set_state(mdev, ns, flags, done);
@@ -1982,17 +2024,17 @@
 
 static inline void drbd_thread_stop(struct drbd_thread *thi)
 {
-	_drbd_thread_stop(thi, FALSE, TRUE);
+	_drbd_thread_stop(thi, false, true);
 }
 
 static inline void drbd_thread_stop_nowait(struct drbd_thread *thi)
 {
-	_drbd_thread_stop(thi, FALSE, FALSE);
+	_drbd_thread_stop(thi, false, false);
 }
 
 static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
 {
-	_drbd_thread_stop(thi, TRUE, FALSE);
+	_drbd_thread_stop(thi, true, false);
 }
 
 /* counts how many answer packets packets we expect from our peer,
@@ -2146,17 +2188,18 @@
 static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
 		unsigned long *bits_left, unsigned int *per_mil_done)
 {
-	/*
-	 * this is to break it at compile time when we change that
-	 * (we may feel 4TB maximum storage per drbd is not enough)
-	 */
+	/* this is to break it at compile time when we change that, in case we
+	 * want to support more than (1<<32) bits on a 32bit arch. */
 	typecheck(unsigned long, mdev->rs_total);
 
 	/* note: both rs_total and rs_left are in bits, i.e. in
 	 * units of BM_BLOCK_SIZE.
 	 * for the percentage, we don't care. */
 
-	*bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		*bits_left = mdev->ov_left;
+	else
+		*bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
 	/* >> 10 to prevent overflow,
 	 * +1 to prevent division by zero */
 	if (*bits_left > mdev->rs_total) {
@@ -2171,10 +2214,19 @@
 				*bits_left, mdev->rs_total, mdev->rs_failed);
 		*per_mil_done = 0;
 	} else {
-		/* make sure the calculation happens in long context */
-		unsigned long tmp = 1000UL -
-				(*bits_left >> 10)*1000UL
-				/ ((mdev->rs_total >> 10) + 1UL);
+		/* Make sure the division happens in long context.
+		 * We allow up to one petabyte storage right now,
+		 * at a granularity of 4k per bit that is 2**38 bits.
+		 * After shift right and multiplication by 1000,
+		 * this should still fit easily into a 32bit long,
+		 * so we don't need a 64bit division on 32bit arch.
+		 * Note: currently we don't support such large bitmaps on 32bit
+		 * arch anyways, but no harm done to be prepared for it here.
+		 */
+		unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10;
+		unsigned long left = *bits_left >> shift;
+		unsigned long total = 1UL + (mdev->rs_total >> shift);
+		unsigned long tmp = 1000UL - left * 1000UL/total;
 		*per_mil_done = tmp;
 	}
 }
@@ -2193,8 +2245,9 @@
 	return mxb;
 }
 
-static inline int drbd_state_is_stable(union drbd_state s)
+static inline int drbd_state_is_stable(struct drbd_conf *mdev)
 {
+	union drbd_state s = mdev->state;
 
 	/* DO NOT add a default clause, we want the compiler to warn us
 	 * for any newly introduced state we may have forgotten to add here */
@@ -2211,11 +2264,9 @@
 	case C_VERIFY_T:
 	case C_PAUSED_SYNC_S:
 	case C_PAUSED_SYNC_T:
-		/* maybe stable, look at the disk state */
-		break;
-
-	/* no new io accepted during tansitional states
-	 * like handshake or teardown */
+	case C_AHEAD:
+	case C_BEHIND:
+		/* transitional states, IO allowed */
 	case C_DISCONNECTING:
 	case C_UNCONNECTED:
 	case C_TIMEOUT:
@@ -2226,7 +2277,15 @@
 	case C_WF_REPORT_PARAMS:
 	case C_STARTING_SYNC_S:
 	case C_STARTING_SYNC_T:
+		break;
+
+		/* Allow IO in BM exchange states with new protocols */
 	case C_WF_BITMAP_S:
+		if (mdev->agreed_pro_version < 96)
+			return 0;
+		break;
+
+		/* no new io accepted in these states */
 	case C_WF_BITMAP_T:
 	case C_WF_SYNC_UUID:
 	case C_MASK:
@@ -2261,41 +2320,47 @@
 	return s.susp || s.susp_nod || s.susp_fen;
 }
 
-static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
+static inline bool may_inc_ap_bio(struct drbd_conf *mdev)
 {
 	int mxb = drbd_get_max_buffers(mdev);
 
 	if (is_susp(mdev->state))
-		return 0;
+		return false;
 	if (test_bit(SUSPEND_IO, &mdev->flags))
-		return 0;
+		return false;
 
 	/* to avoid potential deadlock or bitmap corruption,
 	 * in various places, we only allow new application io
 	 * to start during "stable" states. */
 
 	/* no new io accepted when attaching or detaching the disk */
-	if (!drbd_state_is_stable(mdev->state))
-		return 0;
+	if (!drbd_state_is_stable(mdev))
+		return false;
 
 	/* since some older kernels don't have atomic_add_unless,
 	 * and we are within the spinlock anyways, we have this workaround.  */
 	if (atomic_read(&mdev->ap_bio_cnt) > mxb)
-		return 0;
+		return false;
 	if (test_bit(BITMAP_IO, &mdev->flags))
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
-/* I'd like to use wait_event_lock_irq,
- * but I'm not sure when it got introduced,
- * and not sure when it has 3 or 4 arguments */
+static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count)
+{
+	bool rv = false;
+
+	spin_lock_irq(&mdev->req_lock);
+	rv = may_inc_ap_bio(mdev);
+	if (rv)
+		atomic_add(count, &mdev->ap_bio_cnt);
+	spin_unlock_irq(&mdev->req_lock);
+
+	return rv;
+}
+
 static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
 {
-	/* compare with after_state_ch,
-	 * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */
-	DEFINE_WAIT(wait);
-
 	/* we wait here
 	 *    as long as the device is suspended
 	 *    until the bitmap is no longer on the fly during connection
@@ -2304,16 +2369,7 @@
 	 * to avoid races with the reconnect code,
 	 * we need to atomic_inc within the spinlock. */
 
-	spin_lock_irq(&mdev->req_lock);
-	while (!__inc_ap_bio_cond(mdev)) {
-		prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
-		spin_unlock_irq(&mdev->req_lock);
-		schedule();
-		finish_wait(&mdev->misc_wait, &wait);
-		spin_lock_irq(&mdev->req_lock);
-	}
-	atomic_add(count, &mdev->ap_bio_cnt);
-	spin_unlock_irq(&mdev->req_lock);
+	wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count));
 }
 
 static inline void dec_ap_bio(struct drbd_conf *mdev)
@@ -2333,9 +2389,11 @@
 	}
 }
 
-static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
+static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
 {
+	int changed = mdev->ed_uuid != val;
 	mdev->ed_uuid = val;
+	return changed;
 }
 
 static inline int seq_cmp(u32 a, u32 b)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 8a43ce0..dfc85f3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c

@@ -85,7 +85,8 @@
 MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
 MODULE_VERSION(REL_VERSION);
 MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+		 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
 MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
 
 #include <linux/moduleparam.h>
@@ -115,7 +116,7 @@
 #endif
 
 /* module parameter, defined */
-unsigned int minor_count = 32;
+unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
 int disable_sendpage;
 int allow_oos;
 unsigned int cn_idx = CN_IDX_DRBD;
@@ -335,6 +336,7 @@
 	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
 }
 
+
 /**
  * _tl_restart() - Walks the transfer log, and applies an action to all requests
  * @mdev:	DRBD device.
@@ -456,7 +458,7 @@
 }
 
 /**
- * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
+ * cl_wide_st_chg() - true if the state change is a cluster wide one
  * @mdev:	DRBD device.
  * @os:		old (current) state.
  * @ns:		new (wanted) state.
@@ -473,12 +475,13 @@
 		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
 }
 
-int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
-		      union drbd_state mask, union drbd_state val)
+enum drbd_state_rv
+drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+		  union drbd_state mask, union drbd_state val)
 {
 	unsigned long flags;
 	union drbd_state os, ns;
-	int rv;
+	enum drbd_state_rv rv;
 
 	spin_lock_irqsave(&mdev->req_lock, flags);
 	os = mdev->state;
@@ -502,20 +505,22 @@
 	drbd_change_state(mdev, CS_HARD, mask, val);
 }
 
-static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns);
-static int is_valid_state_transition(struct drbd_conf *,
-				     union drbd_state, union drbd_state);
+static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
+static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
+						    union drbd_state,
+						    union drbd_state);
 static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
 				       union drbd_state ns, const char **warn_sync_abort);
 int drbd_send_state_req(struct drbd_conf *,
 			union drbd_state, union drbd_state);
 
-static enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
-				    union drbd_state mask, union drbd_state val)
+static enum drbd_state_rv
+_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
+	     union drbd_state val)
 {
 	union drbd_state os, ns;
 	unsigned long flags;
-	int rv;
+	enum drbd_state_rv rv;
 
 	if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
 		return SS_CW_SUCCESS;
@@ -536,7 +541,7 @@
 		if (rv == SS_SUCCESS) {
 			rv = is_valid_state_transition(mdev, ns, os);
 			if (rv == SS_SUCCESS)
-				rv = 0; /* cont waiting, otherwise fail. */
+				rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
 		}
 	}
 	spin_unlock_irqrestore(&mdev->req_lock, flags);
@@ -554,14 +559,14 @@
  * Should not be called directly, use drbd_request_state() or
  * _drbd_request_state().
  */
-static int drbd_req_state(struct drbd_conf *mdev,
-			  union drbd_state mask, union drbd_state val,
-			  enum chg_state_flags f)
+static enum drbd_state_rv
+drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
+	       union drbd_state val, enum chg_state_flags f)
 {
 	struct completion done;
 	unsigned long flags;
 	union drbd_state os, ns;
-	int rv;
+	enum drbd_state_rv rv;
 
 	init_completion(&done);
 
@@ -636,10 +641,11 @@
  * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
  * flag, or when logging of failed state change requests is not desired.
  */
-int _drbd_request_state(struct drbd_conf *mdev,	union drbd_state mask,
-			union drbd_state val,	enum chg_state_flags f)
+enum drbd_state_rv
+_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
+		    union drbd_state val, enum chg_state_flags f)
 {
-	int rv;
+	enum drbd_state_rv rv;
 
 	wait_event(mdev->state_wait,
 		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
@@ -663,8 +669,8 @@
 	    );
 }
 
-void print_st_err(struct drbd_conf *mdev,
-	union drbd_state os, union drbd_state ns, int err)
+void print_st_err(struct drbd_conf *mdev, union drbd_state os,
+	          union drbd_state ns, enum drbd_state_rv err)
 {
 	if (err == SS_IN_TRANSIENT_STATE)
 		return;
@@ -674,32 +680,18 @@
 }
 
 
-#define drbd_peer_str drbd_role_str
-#define drbd_pdsk_str drbd_disk_str
-
-#define drbd_susp_str(A)     ((A) ? "1" : "0")
-#define drbd_aftr_isp_str(A) ((A) ? "1" : "0")
-#define drbd_peer_isp_str(A) ((A) ? "1" : "0")
-#define drbd_user_isp_str(A) ((A) ? "1" : "0")
-
-#define PSC(A) \
-	({ if (ns.A != os.A) { \
-		pbp += sprintf(pbp, #A "( %s -> %s ) ", \
-			      drbd_##A##_str(os.A), \
-			      drbd_##A##_str(ns.A)); \
-	} })
-
 /**
  * is_valid_state() - Returns an SS_ error code if ns is not valid
  * @mdev:	DRBD device.
  * @ns:		State to consider.
  */
-static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
+static enum drbd_state_rv
+is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
 {
 	/* See drbd_state_sw_errors in drbd_strings.c */
 
 	enum drbd_fencing_p fp;
-	int rv = SS_SUCCESS;
+	enum drbd_state_rv rv = SS_SUCCESS;
 
 	fp = FP_DONT_CARE;
 	if (get_ldev(mdev)) {
@@ -762,10 +754,11 @@
  * @ns:		new state.
  * @os:		old state.
  */
-static int is_valid_state_transition(struct drbd_conf *mdev,
-				     union drbd_state ns, union drbd_state os)
+static enum drbd_state_rv
+is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
+			  union drbd_state os)
 {
-	int rv = SS_SUCCESS;
+	enum drbd_state_rv rv = SS_SUCCESS;
 
 	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
 	    os.conn > C_CONNECTED)
@@ -800,6 +793,10 @@
 	    os.conn < C_CONNECTED)
 		rv = SS_NEED_CONNECTION;
 
+	if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
+	    && os.conn < C_WF_REPORT_PARAMS)
+		rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
+
 	return rv;
 }
 
@@ -817,6 +814,7 @@
 				       union drbd_state ns, const char **warn_sync_abort)
 {
 	enum drbd_fencing_p fp;
+	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
 
 	fp = FP_DONT_CARE;
 	if (get_ldev(mdev)) {
@@ -869,56 +867,6 @@
 		ns.conn = C_CONNECTED;
 	}
 
-	if (ns.conn >= C_CONNECTED &&
-	    ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) ||
-	     (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) {
-		switch (ns.conn) {
-		case C_WF_BITMAP_T:
-		case C_PAUSED_SYNC_T:
-			ns.disk = D_OUTDATED;
-			break;
-		case C_CONNECTED:
-		case C_WF_BITMAP_S:
-		case C_SYNC_SOURCE:
-		case C_PAUSED_SYNC_S:
-			ns.disk = D_UP_TO_DATE;
-			break;
-		case C_SYNC_TARGET:
-			ns.disk = D_INCONSISTENT;
-			dev_warn(DEV, "Implicitly set disk state Inconsistent!\n");
-			break;
-		}
-		if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE)
-			dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n");
-	}
-
-	if (ns.conn >= C_CONNECTED &&
-	    (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) {
-		switch (ns.conn) {
-		case C_CONNECTED:
-		case C_WF_BITMAP_T:
-		case C_PAUSED_SYNC_T:
-		case C_SYNC_TARGET:
-			ns.pdsk = D_UP_TO_DATE;
-			break;
-		case C_WF_BITMAP_S:
-		case C_PAUSED_SYNC_S:
-			/* remap any consistent state to D_OUTDATED,
-			 * but disallow "upgrade" of not even consistent states.
-			 */
-			ns.pdsk =
-				(D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED)
-				? os.pdsk : D_OUTDATED;
-			break;
-		case C_SYNC_SOURCE:
-			ns.pdsk = D_INCONSISTENT;
-			dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n");
-			break;
-		}
-		if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE)
-			dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n");
-	}
-
 	/* Connection breaks down before we finished "Negotiating" */
 	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
 	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -933,6 +881,94 @@
 		put_ldev(mdev);
 	}
 
+	/* D_CONSISTENT and D_OUTDATED vanish when we get connected */
+	if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
+		if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
+			ns.disk = D_UP_TO_DATE;
+		if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
+			ns.pdsk = D_UP_TO_DATE;
+	}
+
+	/* Implications of the connection stat on the disk states */
+	disk_min = D_DISKLESS;
+	disk_max = D_UP_TO_DATE;
+	pdsk_min = D_INCONSISTENT;
+	pdsk_max = D_UNKNOWN;
+	switch ((enum drbd_conns)ns.conn) {
+	case C_WF_BITMAP_T:
+	case C_PAUSED_SYNC_T:
+	case C_STARTING_SYNC_T:
+	case C_WF_SYNC_UUID:
+	case C_BEHIND:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_OUTDATED;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_VERIFY_S:
+	case C_VERIFY_T:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_CONNECTED:
+		disk_min = D_DISKLESS;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_DISKLESS;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_WF_BITMAP_S:
+	case C_PAUSED_SYNC_S:
+	case C_STARTING_SYNC_S:
+	case C_AHEAD:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
+		break;
+	case C_SYNC_TARGET:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_INCONSISTENT;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_SYNC_SOURCE:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_INCONSISTENT;
+		break;
+	case C_STANDALONE:
+	case C_DISCONNECTING:
+	case C_UNCONNECTED:
+	case C_TIMEOUT:
+	case C_BROKEN_PIPE:
+	case C_NETWORK_FAILURE:
+	case C_PROTOCOL_ERROR:
+	case C_TEAR_DOWN:
+	case C_WF_CONNECTION:
+	case C_WF_REPORT_PARAMS:
+	case C_MASK:
+		break;
+	}
+	if (ns.disk > disk_max)
+		ns.disk = disk_max;
+
+	if (ns.disk < disk_min) {
+		dev_warn(DEV, "Implicitly set disk from %s to %s\n",
+			 drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
+		ns.disk = disk_min;
+	}
+	if (ns.pdsk > pdsk_max)
+		ns.pdsk = pdsk_max;
+
+	if (ns.pdsk < pdsk_min) {
+		dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
+			 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
+		ns.pdsk = pdsk_min;
+	}
+
 	if (fp == FP_STONITH &&
 	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
 	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
@@ -961,6 +997,10 @@
 /* helper for __drbd_set_state */
 static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
 {
+	if (mdev->agreed_pro_version < 90)
+		mdev->ov_start_sector = 0;
+	mdev->rs_total = drbd_bm_bits(mdev);
+	mdev->ov_position = 0;
 	if (cs == C_VERIFY_T) {
 		/* starting online verify from an arbitrary position
 		 * does not fit well into the existing protocol.
@@ -970,11 +1010,15 @@
 		mdev->ov_start_sector = ~(sector_t)0;
 	} else {
 		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
-		if (bit >= mdev->rs_total)
+		if (bit >= mdev->rs_total) {
 			mdev->ov_start_sector =
 				BM_BIT_TO_SECT(mdev->rs_total - 1);
+			mdev->rs_total = 1;
+		} else
+			mdev->rs_total -= bit;
 		mdev->ov_position = mdev->ov_start_sector;
 	}
+	mdev->ov_left = mdev->rs_total;
 }
 
 static void drbd_resume_al(struct drbd_conf *mdev)
@@ -992,12 +1036,12 @@
  *
  * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
  */
-int __drbd_set_state(struct drbd_conf *mdev,
-		    union drbd_state ns, enum chg_state_flags flags,
-		    struct completion *done)
+enum drbd_state_rv
+__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+	         enum chg_state_flags flags, struct completion *done)
 {
 	union drbd_state os;
-	int rv = SS_SUCCESS;
+	enum drbd_state_rv rv = SS_SUCCESS;
 	const char *warn_sync_abort = NULL;
 	struct after_state_chg_work *ascw;
 
@@ -1033,22 +1077,46 @@
 		dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
 
 	{
-		char *pbp, pb[300];
-		pbp = pb;
-		*pbp = 0;
-		PSC(role);
-		PSC(peer);
-		PSC(conn);
-		PSC(disk);
-		PSC(pdsk);
-		if (is_susp(ns) != is_susp(os))
-			pbp += sprintf(pbp, "susp( %s -> %s ) ",
-				       drbd_susp_str(is_susp(os)),
-				       drbd_susp_str(is_susp(ns)));
-		PSC(aftr_isp);
-		PSC(peer_isp);
-		PSC(user_isp);
-		dev_info(DEV, "%s\n", pb);
+	char *pbp, pb[300];
+	pbp = pb;
+	*pbp = 0;
+	if (ns.role != os.role)
+		pbp += sprintf(pbp, "role( %s -> %s ) ",
+			       drbd_role_str(os.role),
+			       drbd_role_str(ns.role));
+	if (ns.peer != os.peer)
+		pbp += sprintf(pbp, "peer( %s -> %s ) ",
+			       drbd_role_str(os.peer),
+			       drbd_role_str(ns.peer));
+	if (ns.conn != os.conn)
+		pbp += sprintf(pbp, "conn( %s -> %s ) ",
+			       drbd_conn_str(os.conn),
+			       drbd_conn_str(ns.conn));
+	if (ns.disk != os.disk)
+		pbp += sprintf(pbp, "disk( %s -> %s ) ",
+			       drbd_disk_str(os.disk),
+			       drbd_disk_str(ns.disk));
+	if (ns.pdsk != os.pdsk)
+		pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
+			       drbd_disk_str(os.pdsk),
+			       drbd_disk_str(ns.pdsk));
+	if (is_susp(ns) != is_susp(os))
+		pbp += sprintf(pbp, "susp( %d -> %d ) ",
+			       is_susp(os),
+			       is_susp(ns));
+	if (ns.aftr_isp != os.aftr_isp)
+		pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
+			       os.aftr_isp,
+			       ns.aftr_isp);
+	if (ns.peer_isp != os.peer_isp)
+		pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
+			       os.peer_isp,
+			       ns.peer_isp);
+	if (ns.user_isp != os.user_isp)
+		pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
+			       os.user_isp,
+			       ns.user_isp);
+	dev_info(DEV, "%s\n", pb);
 	}
 
 	/* solve the race between becoming unconfigured,
@@ -1074,6 +1142,10 @@
 		atomic_inc(&mdev->local_cnt);
 
 	mdev->state = ns;
+
+	if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+		drbd_print_uuids(mdev, "attached to UUIDs");
+
 	wake_up(&mdev->misc_wait);
 	wake_up(&mdev->state_wait);
 
@@ -1081,7 +1153,7 @@
 	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
 	    ns.conn < C_CONNECTED) {
 		mdev->ov_start_sector =
-			BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left);
+			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
 		dev_info(DEV, "Online Verify reached sector %llu\n",
 			(unsigned long long)mdev->ov_start_sector);
 	}
@@ -1106,14 +1178,7 @@
 		unsigned long now = jiffies;
 		int i;
 
-		mdev->ov_position = 0;
-		mdev->rs_total = drbd_bm_bits(mdev);
-		if (mdev->agreed_pro_version >= 90)
-			set_ov_position(mdev, ns.conn);
-		else
-			mdev->ov_start_sector = 0;
-		mdev->ov_left = mdev->rs_total
-			      - BM_SECT_TO_BIT(mdev->ov_position);
+		set_ov_position(mdev, ns.conn);
 		mdev->rs_start = now;
 		mdev->rs_last_events = 0;
 		mdev->rs_last_sect_ev = 0;
@@ -1121,10 +1186,12 @@
 		mdev->ov_last_oos_start = 0;
 
 		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
-			mdev->rs_mark_left[i] = mdev->rs_total;
+			mdev->rs_mark_left[i] = mdev->ov_left;
 			mdev->rs_mark_time[i] = now;
 		}
 
+		drbd_rs_controller_reset(mdev);
+
 		if (ns.conn == C_VERIFY_S) {
 			dev_info(DEV, "Starting Online Verify from sector %llu\n",
 					(unsigned long long)mdev->ov_position);
@@ -1228,6 +1295,26 @@
 	}
 }
 
+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
+		int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags)
+{
+	int rv;
+
+	D_ASSERT(current == mdev->worker.task);
+
+	/* open coded non-blocking drbd_suspend_io(mdev); */
+	set_bit(SUSPEND_IO, &mdev->flags);
+
+	drbd_bm_lock(mdev, why, flags);
+	rv = io_fn(mdev);
+	drbd_bm_unlock(mdev);
+
+	drbd_resume_io(mdev);
+
+	return rv;
+}
+
 /**
  * after_state_ch() - Perform after state change actions that may sleep
  * @mdev:	DRBD device.
@@ -1266,16 +1353,14 @@
 
 	nsm.i = -1;
 	if (ns.susp_nod) {
-		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
-			if (ns.conn == C_CONNECTED)
-				what = resend, nsm.susp_nod = 0;
-			else /* ns.conn > C_CONNECTED */
-				dev_err(DEV, "Unexpected Resynd going on!\n");
-		}
+		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+			what = resend;
 
 		if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
-			what = restart_frozen_disk_io, nsm.susp_nod = 0;
+			what = restart_frozen_disk_io;
 
+		if (what != nothing)
+			nsm.susp_nod = 0;
 	}
 
 	if (ns.susp_fen) {
@@ -1306,13 +1391,30 @@
 		spin_unlock_irq(&mdev->req_lock);
 	}
 
+	/* Became sync source.  With protocol >= 96, we still need to send out
+	 * the sync uuid now. Need to do that before any drbd_send_state, or
+	 * the other side may go "paused sync" before receiving the sync uuids,
+	 * which is unexpected. */
+	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+	    (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+	    mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
+		drbd_gen_and_send_sync_uuid(mdev);
+		put_ldev(mdev);
+	}
+
 	/* Do not change the order of the if above and the two below... */
 	if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */
 		drbd_send_uuids(mdev);
 		drbd_send_state(mdev);
 	}
-	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
-		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
+	/* No point in queuing send_bitmap if we don't have a connection
+	 * anymore, so check also the _current_ state, not only the new state
+	 * at the time this work was queued. */
+	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+	    mdev->state.conn == C_WF_BITMAP_S)
+		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+				"send_bitmap (WFBitMapS)",
+				BM_LOCKED_TEST_ALLOWED);
 
 	/* Lost contact to peer's copy of the data */
 	if ((os.pdsk >= D_INCONSISTENT &&
@@ -1343,7 +1445,23 @@
 
 		/* D_DISKLESS Peer becomes secondary */
 		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
-			drbd_al_to_on_disk_bm(mdev);
+			/* We may still be Primary ourselves.
+			 * No harm done if the bitmap still changes,
+			 * redirtied pages will follow later. */
+			drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote diskless peer", BM_LOCKED_SET_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	/* Write out all changed bits on demote.
+	 * Though, no need to da that just yet
+	 * if there is a resync going on still */
+	if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+		mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+		/* No changes to the bitmap expected this time, so assert that,
+		 * even though no harm was done if it did change. */
+		drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote", BM_LOCKED_TEST_ALLOWED);
 		put_ldev(mdev);
 	}
 
@@ -1371,15 +1489,23 @@
 	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
 		drbd_send_state(mdev);
 
+	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
+		drbd_send_state(mdev);
+
 	/* We are in the progress to start a full sync... */
 	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
 	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
-		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync");
+		/* no other bitmap changes expected during this phase */
+		drbd_queue_bitmap_io(mdev,
+			&drbd_bmio_set_n_write, &abw_start_sync,
+			"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
 
 	/* We are invalidating our self... */
 	if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
 	    os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
-		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
+		/* other bitmap operation expected during this phase */
+		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
+			"set_n_write from invalidate", BM_LOCKED_MASK);
 
 	/* first half of local IO error, failure to attach,
 	 * or administrative detach */
@@ -1434,8 +1560,6 @@
 
 		if (drbd_send_state(mdev))
 			dev_warn(DEV, "Notified peer that I'm now diskless.\n");
-		else
-			dev_err(DEV, "Sending state for being diskless failed\n");
 		/* corresponding get_ldev in __drbd_set_state
 		 * this may finaly trigger drbd_ldev_destroy. */
 		put_ldev(mdev);
@@ -1459,6 +1583,19 @@
 	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
 		drbd_send_state(mdev);
 
+	/* This triggers bitmap writeout of potentially still unwritten pages
+	 * if the resync finished cleanly, or aborted because of peer disk
+	 * failure, or because of connection loss.
+	 * For resync aborted because of local disk failure, we cannot do
+	 * any bitmap writeout anymore.
+	 * No harm done if some bits change during this phase.
+	 */
+	if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
+		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
+			"write from resync_finished", BM_LOCKED_SET_ALLOWED);
+		put_ldev(mdev);
+	}
+
 	/* free tl_hash if we Got thawed and are C_STANDALONE */
 	if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
 		drbd_free_tl_hash(mdev);
@@ -1559,7 +1696,7 @@
 		if (!try_module_get(THIS_MODULE)) {
 			dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
 			spin_unlock_irqrestore(&thi->t_lock, flags);
-			return FALSE;
+			return false;
 		}
 
 		init_completion(&thi->stop);
@@ -1576,7 +1713,7 @@
 			dev_err(DEV, "Couldn't start thread\n");
 
 			module_put(THIS_MODULE);
-			return FALSE;
+			return false;
 		}
 		spin_lock_irqsave(&thi->t_lock, flags);
 		thi->task = nt;
@@ -1596,7 +1733,7 @@
 		break;
 	}
 
-	return TRUE;
+	return true;
 }
 
 
@@ -1694,8 +1831,8 @@
 {
 	int sent, ok;
 
-	ERR_IF(!h) return FALSE;
-	ERR_IF(!size) return FALSE;
+	ERR_IF(!h) return false;
+	ERR_IF(!size) return false;
 
 	h->magic   = BE_DRBD_MAGIC;
 	h->command = cpu_to_be16(cmd);
@@ -1704,8 +1841,8 @@
 	sent = drbd_send(mdev, sock, h, size, msg_flags);
 
 	ok = (sent == size);
-	if (!ok)
-		dev_err(DEV, "short sent %s size=%d sent=%d\n",
+	if (!ok && !signal_pending(current))
+		dev_warn(DEV, "short sent %s size=%d sent=%d\n",
 		    cmdname(cmd), (int)size, sent);
 	return ok;
 }
@@ -1840,7 +1977,7 @@
 		else {
 			dev_err(DEV, "--dry-run is not supported by peer");
 			kfree(p);
-			return 0;
+			return -1;
 		}
 	}
 	p->conn_flags    = cpu_to_be32(cf);
@@ -1888,12 +2025,36 @@
 	return _drbd_send_uuids(mdev, 8);
 }
 
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
+{
+	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		u64 *uuid = mdev->ldev->md.uuid;
+		dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
+		     text,
+		     (unsigned long long)uuid[UI_CURRENT],
+		     (unsigned long long)uuid[UI_BITMAP],
+		     (unsigned long long)uuid[UI_HISTORY_START],
+		     (unsigned long long)uuid[UI_HISTORY_END]);
+		put_ldev(mdev);
+	} else {
+		dev_info(DEV, "%s effective data uuid: %016llX\n",
+				text,
+				(unsigned long long)mdev->ed_uuid);
+	}
+}
 
-int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
 {
 	struct p_rs_uuid p;
+	u64 uuid;
 
-	p.uuid = cpu_to_be64(val);
+	D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
+
+	uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
+	drbd_uuid_set(mdev, UI_BITMAP, uuid);
+	drbd_print_uuids(mdev, "updated sync UUID");
+	drbd_md_sync(mdev);
+	p.uuid = cpu_to_be64(uuid);
 
 	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
 			     (struct p_header80 *)&p, sizeof(p));
@@ -1921,7 +2082,7 @@
 	p.d_size = cpu_to_be64(d_size);
 	p.u_size = cpu_to_be64(u_size);
 	p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
-	p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
+	p.max_bio_size = cpu_to_be32(queue_max_hw_sectors(mdev->rq_queue) << 9);
 	p.queue_order_type = cpu_to_be16(q_order_type);
 	p.dds_flags = cpu_to_be16(flags);
 
@@ -1972,7 +2133,7 @@
 			     (struct p_header80 *)&p, sizeof(p));
 }
 
-int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
+int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
 {
 	struct p_req_state_reply p;
 
@@ -2076,9 +2237,15 @@
 	return len;
 }
 
-enum { OK, FAILED, DONE }
+/**
+ * send_bitmap_rle_or_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
 send_bitmap_rle_or_plain(struct drbd_conf *mdev,
-	struct p_header80 *h, struct bm_xfer_ctx *c)
+			 struct p_header80 *h, struct bm_xfer_ctx *c)
 {
 	struct p_compressed_bm *p = (void*)h;
 	unsigned long num_words;
@@ -2088,7 +2255,7 @@
 	len = fill_bitmap_rle_bits(mdev, p, c);
 
 	if (len < 0)
-		return FAILED;
+		return -EIO;
 
 	if (len) {
 		DCBP_set_code(p, RLE_VLI_Bits);
@@ -2118,11 +2285,14 @@
 		if (c->bit_offset > c->bm_bits)
 			c->bit_offset = c->bm_bits;
 	}
-	ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
-
-	if (ok == DONE)
-		INFO_bm_xfer_stats(mdev, "send", c);
-	return ok;
+	if (ok) {
+		if (len == 0) {
+			INFO_bm_xfer_stats(mdev, "send", c);
+			return 0;
+		} else
+			return 1;
+	}
+	return -EIO;
 }
 
 /* See the comment at receive_bitmap() */
@@ -2130,16 +2300,16 @@
 {
 	struct bm_xfer_ctx c;
 	struct p_header80 *p;
-	int ret;
+	int err;
 
-	ERR_IF(!mdev->bitmap) return FALSE;
+	ERR_IF(!mdev->bitmap) return false;
 
 	/* maybe we should use some per thread scratch page,
 	 * and allocate that during initial device creation? */
 	p = (struct p_header80 *) __get_free_page(GFP_NOIO);
 	if (!p) {
 		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
-		return FALSE;
+		return false;
 	}
 
 	if (get_ldev(mdev)) {
@@ -2165,11 +2335,11 @@
 	};
 
 	do {
-		ret = send_bitmap_rle_or_plain(mdev, p, &c);
-	} while (ret == OK);
+		err = send_bitmap_rle_or_plain(mdev, p, &c);
+	} while (err > 0);
 
 	free_page((unsigned long) p);
-	return (ret == DONE);
+	return err == 0;
 }
 
 int drbd_send_bitmap(struct drbd_conf *mdev)
@@ -2192,7 +2362,7 @@
 	p.set_size = cpu_to_be32(set_size);
 
 	if (mdev->state.conn < C_CONNECTED)
-		return FALSE;
+		return false;
 	ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
 			(struct p_header80 *)&p, sizeof(p));
 	return ok;
@@ -2220,7 +2390,7 @@
 	p.seq_num  = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
 
 	if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
-		return FALSE;
+		return false;
 	ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
 				(struct p_header80 *)&p, sizeof(p));
 	return ok;
@@ -2326,8 +2496,8 @@
 }
 
 /* called on sndtimeo
- * returns FALSE if we should retry,
- * TRUE if we think connection is dead
+ * returns false if we should retry,
+ * true if we think connection is dead
  */
 static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
 {
@@ -2340,7 +2510,7 @@
 		|| mdev->state.conn < C_CONNECTED;
 
 	if (drop_it)
-		return TRUE;
+		return true;
 
 	drop_it = !--mdev->ko_count;
 	if (!drop_it) {
@@ -2531,13 +2701,39 @@
 	if (ok && dgs) {
 		dgb = mdev->int_dig_out;
 		drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
-		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+		ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
 	}
 	if (ok) {
-		if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
+		/* For protocol A, we have to memcpy the payload into
+		 * socket buffers, as we may complete right away
+		 * as soon as we handed it over to tcp, at which point the data
+		 * pages may become invalid.
+		 *
+		 * For data-integrity enabled, we copy it as well, so we can be
+		 * sure that even if the bio pages may still be modified, it
+		 * won't change the data on the wire, thus if the digest checks
+		 * out ok after sending on this side, but does not fit on the
+		 * receiving side, we sure have detected corruption elsewhere.
+		 */
+		if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
 			ok = _drbd_send_bio(mdev, req->master_bio);
 		else
 			ok = _drbd_send_zc_bio(mdev, req->master_bio);
+
+		/* double check digest, sometimes buffers have been modified in flight. */
+		if (dgs > 0 && dgs <= 64) {
+			/* 64 byte, 512 bit, is the larges digest size
+			 * currently supported in kernel crypto. */
+			unsigned char digest[64];
+			drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
+			if (memcmp(mdev->int_dig_out, digest, dgs)) {
+				dev_warn(DEV,
+					"Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
+					(unsigned long long)req->sector, req->size);
+			}
+		} /* else if (dgs > 64) {
+		     ... Be noisy about digest too large ...
+		} */
 	}
 
 	drbd_put_data_sock(mdev);
@@ -2587,7 +2783,7 @@
 	if (ok && dgs) {
 		dgb = mdev->int_dig_out;
 		drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
-		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+		ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
 	}
 	if (ok)
 		ok = _drbd_send_zc_ee(mdev, e);
@@ -2597,6 +2793,16 @@
 	return ok;
 }
 
+int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
+{
+	struct p_block_desc p;
+
+	p.sector  = cpu_to_be64(req->sector);
+	p.blksize = cpu_to_be32(req->size);
+
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
+}
+
 /*
   drbd_send distinguishes two cases:
 
@@ -2770,6 +2976,7 @@
 	atomic_set(&mdev->pp_in_use_by_net, 0);
 	atomic_set(&mdev->rs_sect_in, 0);
 	atomic_set(&mdev->rs_sect_ev, 0);
+	atomic_set(&mdev->ap_in_flight, 0);
 
 	mutex_init(&mdev->md_io_mutex);
 	mutex_init(&mdev->data.mutex);
@@ -2798,19 +3005,27 @@
 	INIT_LIST_HEAD(&mdev->unplug_work.list);
 	INIT_LIST_HEAD(&mdev->go_diskless.list);
 	INIT_LIST_HEAD(&mdev->md_sync_work.list);
+	INIT_LIST_HEAD(&mdev->start_resync_work.list);
 	INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
 
-	mdev->resync_work.cb  = w_resync_inactive;
+	mdev->resync_work.cb  = w_resync_timer;
 	mdev->unplug_work.cb  = w_send_write_hint;
 	mdev->go_diskless.cb  = w_go_diskless;
 	mdev->md_sync_work.cb = w_md_sync;
 	mdev->bm_io_work.w.cb = w_bitmap_io;
+	mdev->start_resync_work.cb = w_start_resync;
 	init_timer(&mdev->resync_timer);
 	init_timer(&mdev->md_sync_timer);
+	init_timer(&mdev->start_resync_timer);
+	init_timer(&mdev->request_timer);
 	mdev->resync_timer.function = resync_timer_fn;
 	mdev->resync_timer.data = (unsigned long) mdev;
 	mdev->md_sync_timer.function = md_sync_timer_fn;
 	mdev->md_sync_timer.data = (unsigned long) mdev;
+	mdev->start_resync_timer.function = start_resync_timer_fn;
+	mdev->start_resync_timer.data = (unsigned long) mdev;
+	mdev->request_timer.function = request_timer_fn;
+	mdev->request_timer.data = (unsigned long) mdev;
 
 	init_waitqueue_head(&mdev->misc_wait);
 	init_waitqueue_head(&mdev->state_wait);
@@ -2881,6 +3096,8 @@
 	D_ASSERT(list_empty(&mdev->resync_work.list));
 	D_ASSERT(list_empty(&mdev->unplug_work.list));
 	D_ASSERT(list_empty(&mdev->go_diskless.list));
+
+	drbd_set_defaults(mdev);
 }
 
 
@@ -2923,7 +3140,7 @@
 static int drbd_create_mempools(void)
 {
 	struct page *page;
-	const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count;
+	const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
 	int i;
 
 	/* prepare our caches and mempools */
@@ -3087,11 +3304,20 @@
 
 	unregister_reboot_notifier(&drbd_notifier);
 
+	/* first remove proc,
+	 * drbdsetup uses it's presence to detect
+	 * whether DRBD is loaded.
+	 * If we would get stuck in proc removal,
+	 * but have netlink already deregistered,
+	 * some drbdsetup commands may wait forever
+	 * for an answer.
+	 */
+	if (drbd_proc)
+		remove_proc_entry("drbd", NULL);
+
 	drbd_nl_cleanup();
 
 	if (minor_table) {
-		if (drbd_proc)
-			remove_proc_entry("drbd", NULL);
 		i = minor_count;
 		while (i--)
 			drbd_delete_device(i);
@@ -3119,7 +3345,7 @@
 	char reason = '-';
 	int r = 0;
 
-	if (!__inc_ap_bio_cond(mdev)) {
+	if (!may_inc_ap_bio(mdev)) {
 		/* DRBD has frozen IO */
 		r = bdi_bits;
 		reason = 'd';
@@ -3172,7 +3398,7 @@
 		goto out_no_disk;
 	mdev->vdisk = disk;
 
-	set_disk_ro(disk, TRUE);
+	set_disk_ro(disk, true);
 
 	disk->queue = q;
 	disk->major = DRBD_MAJOR;
@@ -3188,8 +3414,8 @@
 	q->backing_dev_info.congested_fn = drbd_congested;
 	q->backing_dev_info.congested_data = mdev;
 
-	blk_queue_make_request(q, drbd_make_request_26);
-	blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
+	blk_queue_make_request(q, drbd_make_request);
+	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE >> 9);
 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	blk_queue_merge_bvec(q, drbd_merge_bvec);
 	q->queue_lock = &mdev->req_lock;
@@ -3251,6 +3477,7 @@
 	put_disk(mdev->vdisk);
 	blk_cleanup_queue(mdev->rq_queue);
 	free_cpumask_var(mdev->cpu_mask);
+	drbd_free_tl_hash(mdev);
 	kfree(mdev);
 }
 
@@ -3266,7 +3493,7 @@
 		return -EINVAL;
 	}
 
-	if (1 > minor_count || minor_count > 255) {
+	if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
 		printk(KERN_ERR
 			"drbd: invalid minor_count (%d)\n", minor_count);
 #ifdef MODULE
@@ -3448,7 +3675,7 @@
 	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
 		/* this was a try anyways ... */
 		dev_err(DEV, "meta data update failed!\n");
-		drbd_chk_io_error(mdev, 1, TRUE);
+		drbd_chk_io_error(mdev, 1, true);
 	}
 
 	/* Update mdev->ldev->md.la_size_sect,
@@ -3464,7 +3691,7 @@
  * @mdev:	DRBD device.
  * @bdev:	Device from which the meta data should be read in.
  *
- * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case
+ * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
  * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
@@ -3534,28 +3761,6 @@
 	return rv;
 }
 
-static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
-{
-	static char *uuid_str[UI_EXTENDED_SIZE] = {
-		[UI_CURRENT] = "CURRENT",
-		[UI_BITMAP] = "BITMAP",
-		[UI_HISTORY_START] = "HISTORY_START",
-		[UI_HISTORY_END] = "HISTORY_END",
-		[UI_SIZE] = "SIZE",
-		[UI_FLAGS] = "FLAGS",
-	};
-
-	if (index >= UI_EXTENDED_SIZE) {
-		dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
-		return;
-	}
-
-	dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n",
-		 uuid_str[index],
-		 (unsigned long long)mdev->ldev->md.uuid[index]);
-}
-
-
 /**
  * drbd_md_mark_dirty() - Mark meta data super block as dirty
  * @mdev:	DRBD device.
@@ -3585,10 +3790,8 @@
 {
 	int i;
 
-	for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
+	for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
 		mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
-		debug_drbd_uuid(mdev, i+1);
-	}
 }
 
 void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
@@ -3603,7 +3806,6 @@
 	}
 
 	mdev->ldev->md.uuid[idx] = val;
-	debug_drbd_uuid(mdev, idx);
 	drbd_md_mark_dirty(mdev);
 }
 
@@ -3613,7 +3815,6 @@
 	if (mdev->ldev->md.uuid[idx]) {
 		drbd_uuid_move_history(mdev);
 		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
-		debug_drbd_uuid(mdev, UI_HISTORY_START);
 	}
 	_drbd_uuid_set(mdev, idx, val);
 }
@@ -3628,14 +3829,16 @@
 void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
 {
 	u64 val;
+	unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
 
-	dev_info(DEV, "Creating new current UUID\n");
-	D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
+	if (bm_uuid)
+		dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
+
 	mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
-	debug_drbd_uuid(mdev, UI_BITMAP);
 
 	get_random_bytes(&val, sizeof(u64));
 	_drbd_uuid_set(mdev, UI_CURRENT, val);
+	drbd_print_uuids(mdev, "new current UUID");
 	/* get it to stable storage _now_ */
 	drbd_md_sync(mdev);
 }
@@ -3649,16 +3852,12 @@
 		drbd_uuid_move_history(mdev);
 		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
 		mdev->ldev->md.uuid[UI_BITMAP] = 0;
-		debug_drbd_uuid(mdev, UI_HISTORY_START);
-		debug_drbd_uuid(mdev, UI_BITMAP);
 	} else {
-		if (mdev->ldev->md.uuid[UI_BITMAP])
-			dev_warn(DEV, "bm UUID already set");
+		unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+		if (bm_uuid)
+			dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
-		mdev->ldev->md.uuid[UI_BITMAP] = val;
-		mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
-
-		debug_drbd_uuid(mdev, UI_BITMAP);
+		mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
 	}
 	drbd_md_mark_dirty(mdev);
 }
@@ -3714,15 +3913,19 @@
 static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
 {
 	struct bm_io_work *work = container_of(w, struct bm_io_work, w);
-	int rv;
+	int rv = -EIO;
 
 	D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
 
-	drbd_bm_lock(mdev, work->why);
-	rv = work->io_fn(mdev);
-	drbd_bm_unlock(mdev);
+	if (get_ldev(mdev)) {
+		drbd_bm_lock(mdev, work->why, work->flags);
+		rv = work->io_fn(mdev);
+		drbd_bm_unlock(mdev);
+		put_ldev(mdev);
+	}
 
 	clear_bit(BITMAP_IO, &mdev->flags);
+	smp_mb__after_clear_bit();
 	wake_up(&mdev->misc_wait);
 
 	if (work->done)
@@ -3730,6 +3933,7 @@
 
 	clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
 	work->why = NULL;
+	work->flags = 0;
 
 	return 1;
 }
@@ -3784,7 +3988,7 @@
 void drbd_queue_bitmap_io(struct drbd_conf *mdev,
 			  int (*io_fn)(struct drbd_conf *),
 			  void (*done)(struct drbd_conf *, int),
-			  char *why)
+			  char *why, enum bm_flag flags)
 {
 	D_ASSERT(current == mdev->worker.task);
 
@@ -3798,15 +4002,15 @@
 	mdev->bm_io_work.io_fn = io_fn;
 	mdev->bm_io_work.done = done;
 	mdev->bm_io_work.why = why;
+	mdev->bm_io_work.flags = flags;
 
+	spin_lock_irq(&mdev->req_lock);
 	set_bit(BITMAP_IO, &mdev->flags);
 	if (atomic_read(&mdev->ap_bio_cnt) == 0) {
-		if (list_empty(&mdev->bm_io_work.w.list)) {
-			set_bit(BITMAP_IO_QUEUED, &mdev->flags);
+		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
 			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
-		} else
-			dev_err(DEV, "FIXME avoided double queuing bm_io_work\n");
 	}
+	spin_unlock_irq(&mdev->req_lock);
 }
 
 /**
@@ -3818,19 +4022,22 @@
  * freezes application IO while that the actual IO operations runs. This
  * functions MAY NOT be called from worker context.
  */
-int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags)
 {
 	int rv;
 
 	D_ASSERT(current != mdev->worker.task);
 
-	drbd_suspend_io(mdev);
+	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+		drbd_suspend_io(mdev);
 
-	drbd_bm_lock(mdev, why);
+	drbd_bm_lock(mdev, why, flags);
 	rv = io_fn(mdev);
 	drbd_bm_unlock(mdev);
 
-	drbd_resume_io(mdev);
+	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+		drbd_resume_io(mdev);
 
 	return rv;
 }

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index fe81c85..03b29f7 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c

@@ -288,10 +288,11 @@
 		dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
 }
 
-int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
+enum drbd_state_rv
+drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 {
 	const int max_tries = 4;
-	int r = 0;
+	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
 	int try = 0;
 	int forced = 0;
 	union drbd_state mask, val;
@@ -306,17 +307,17 @@
 	val.i  = 0; val.role  = new_role;
 
 	while (try++ < max_tries) {
-		r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
+		rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
 
 		/* in case we first succeeded to outdate,
 		 * but now suddenly could establish a connection */
-		if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
+		if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
 			val.pdsk = 0;
 			mask.pdsk = 0;
 			continue;
 		}
 
-		if (r == SS_NO_UP_TO_DATE_DISK && force &&
+		if (rv == SS_NO_UP_TO_DATE_DISK && force &&
 		    (mdev->state.disk < D_UP_TO_DATE &&
 		     mdev->state.disk >= D_INCONSISTENT)) {
 			mask.disk = D_MASK;
@@ -325,7 +326,7 @@
 			continue;
 		}
 
-		if (r == SS_NO_UP_TO_DATE_DISK &&
+		if (rv == SS_NO_UP_TO_DATE_DISK &&
 		    mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 			D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
 			nps = drbd_try_outdate_peer(mdev);
@@ -341,9 +342,9 @@
 			continue;
 		}
 
-		if (r == SS_NOTHING_TO_DO)
+		if (rv == SS_NOTHING_TO_DO)
 			goto fail;
-		if (r == SS_PRIMARY_NOP && mask.pdsk == 0) {
+		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
 			nps = drbd_try_outdate_peer(mdev);
 
 			if (force && nps > D_OUTDATED) {
@@ -356,25 +357,24 @@
 
 			continue;
 		}
-		if (r == SS_TWO_PRIMARIES) {
+		if (rv == SS_TWO_PRIMARIES) {
 			/* Maybe the peer is detected as dead very soon...
 			   retry at most once more in this case. */
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10);
+			schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10);
 			if (try < max_tries)
 				try = max_tries - 1;
 			continue;
 		}
-		if (r < SS_SUCCESS) {
-			r = _drbd_request_state(mdev, mask, val,
+		if (rv < SS_SUCCESS) {
+			rv = _drbd_request_state(mdev, mask, val,
 						CS_VERBOSE + CS_WAIT_COMPLETE);
-			if (r < SS_SUCCESS)
+			if (rv < SS_SUCCESS)
 				goto fail;
 		}
 		break;
 	}
 
-	if (r < SS_SUCCESS)
+	if (rv < SS_SUCCESS)
 		goto fail;
 
 	if (forced)
@@ -384,7 +384,7 @@
 	wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
 
 	if (new_role == R_SECONDARY) {
-		set_disk_ro(mdev->vdisk, TRUE);
+		set_disk_ro(mdev->vdisk, true);
 		if (get_ldev(mdev)) {
 			mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
 			put_ldev(mdev);
@@ -394,7 +394,7 @@
 			mdev->net_conf->want_lose = 0;
 			put_net_conf(mdev);
 		}
-		set_disk_ro(mdev->vdisk, FALSE);
+		set_disk_ro(mdev->vdisk, false);
 		if (get_ldev(mdev)) {
 			if (((mdev->state.conn < C_CONNECTED ||
 			       mdev->state.pdsk <= D_FAILED)
@@ -406,10 +406,8 @@
 		}
 	}
 
-	if ((new_role == R_SECONDARY) && get_ldev(mdev)) {
-		drbd_al_to_on_disk_bm(mdev);
-		put_ldev(mdev);
-	}
+	/* writeout of activity log covered areas of the bitmap
+	 * to stable storage done in after state change already */
 
 	if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
 		/* if this was forced, we should consider sync */
@@ -423,7 +421,7 @@
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
  fail:
 	mutex_unlock(&mdev->state_mutex);
-	return r;
+	return rv;
 }
 
 static struct drbd_conf *ensure_mdev(int minor, int create)
@@ -528,17 +526,19 @@
 	}
 }
 
+/* input size is expected to be in KB */
 char *ppsize(char *buf, unsigned long long size)
 {
-	/* Needs 9 bytes at max. */
+	/* Needs 9 bytes at max including trailing NUL:
+	 * -1ULL ==> "16384 EB" */
 	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
 	int base = 0;
-	while (size >= 10000) {
+	while (size >= 10000 && base < sizeof(units)-1) {
 		/* shift + round */
 		size = (size >> 10) + !!(size & (1<<9));
 		base++;
 	}
-	sprintf(buf, "%lu %cB", (long)size, units[base]);
+	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
 
 	return buf;
 }
@@ -642,11 +642,19 @@
 		|| prev_size	   != mdev->ldev->md.md_size_sect;
 
 	if (la_size_changed || md_moved) {
+		int err;
+
 		drbd_al_shrink(mdev); /* All extents inactive. */
 		dev_info(DEV, "Writing the whole bitmap, %s\n",
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed ? "size changed" : "md moved");
-		rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */
+		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
+		err = drbd_bitmap_io(mdev, &drbd_bm_write,
+				"size changed", BM_LOCKED_MASK);
+		if (err) {
+			rv = dev_size_error;
+			goto out;
+		}
 		drbd_md_mark_dirty(mdev);
 	}
 
@@ -765,22 +773,21 @@
 	return 0;
 }
 
-void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local)
+void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size) __must_hold(local)
 {
 	struct request_queue * const q = mdev->rq_queue;
 	struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
 	int max_segments = mdev->ldev->dc.max_bio_bvecs;
+	int max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
 
-	max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
-
-	blk_queue_max_hw_sectors(q, max_seg_s >> 9);
-	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
-	blk_queue_max_segment_size(q, max_seg_s);
 	blk_queue_logical_block_size(q, 512);
-	blk_queue_segment_boundary(q, PAGE_SIZE-1);
-	blk_stack_limits(&q->limits, &b->limits, 0);
+	blk_queue_max_hw_sectors(q, max_hw_sectors);
+	/* This is the workaround for "bio would need to, but cannot, be split" */
+	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
+	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
+	blk_queue_stack_limits(q, b);
 
-	dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
+	dev_info(DEV, "max BIO size = %u\n", queue_max_hw_sectors(q) << 9);
 
 	if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
 		dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
@@ -850,7 +857,7 @@
 static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 			     struct drbd_nl_cfg_reply *reply)
 {
-	enum drbd_ret_codes retcode;
+	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
 	sector_t max_possible_sectors;
 	sector_t min_md_device_sectors;
@@ -858,8 +865,8 @@
 	struct block_device *bdev;
 	struct lru_cache *resync_lru = NULL;
 	union drbd_state ns, os;
-	unsigned int max_seg_s;
-	int rv;
+	unsigned int max_bio_size;
+	enum drbd_state_rv rv;
 	int cp_discovered = 0;
 	int logical_block_size;
 
@@ -1005,9 +1012,10 @@
 	/* and for any other previously queued work */
 	drbd_flush_workqueue(mdev);
 
-	retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
+	rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
+	retcode = rv;  /* FIXME: Type mismatch. */
 	drbd_resume_io(mdev);
-	if (retcode < SS_SUCCESS)
+	if (rv < SS_SUCCESS)
 		goto fail;
 
 	if (!get_ldev_if_state(mdev, D_ATTACHING))
@@ -1109,20 +1117,20 @@
 	mdev->read_cnt = 0;
 	mdev->writ_cnt = 0;
 
-	max_seg_s = DRBD_MAX_SEGMENT_SIZE;
+	max_bio_size = DRBD_MAX_BIO_SIZE;
 	if (mdev->state.conn == C_CONNECTED) {
 		/* We are Primary, Connected, and now attach a new local
 		 * backing store. We must not increase the user visible maximum
 		 * bio size on this device to something the peer may not be
 		 * able to handle. */
 		if (mdev->agreed_pro_version < 94)
-			max_seg_s = queue_max_segment_size(mdev->rq_queue);
+			max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
 		else if (mdev->agreed_pro_version == 94)
-			max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
+			max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
 		/* else: drbd 8.3.9 and later, stay with default */
 	}
 
-	drbd_setup_queue_param(mdev, max_seg_s);
+	drbd_setup_queue_param(mdev, max_bio_size);
 
 	/* If I am currently not R_PRIMARY,
 	 * but meta data primary indicator is set,
@@ -1154,12 +1162,14 @@
 	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
 		dev_info(DEV, "Assuming that all blocks are out of sync "
 		     "(aka FullSync)\n");
-		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) {
+		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
+			"set_n_write from attaching", BM_LOCKED_MASK)) {
 			retcode = ERR_IO_MD_DISK;
 			goto force_diskless_dec;
 		}
 	} else {
-		if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) {
+		if (drbd_bitmap_io(mdev, &drbd_bm_read,
+			"read from attaching", BM_LOCKED_MASK) < 0) {
 			retcode = ERR_IO_MD_DISK;
 			goto force_diskless_dec;
 		}
@@ -1167,7 +1177,11 @@
 
 	if (cp_discovered) {
 		drbd_al_apply_to_bm(mdev);
-		drbd_al_to_on_disk_bm(mdev);
+		if (drbd_bitmap_io(mdev, &drbd_bm_write,
+			"crashed primary apply AL", BM_LOCKED_MASK)) {
+			retcode = ERR_IO_MD_DISK;
+			goto force_diskless_dec;
+		}
 	}
 
 	if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
@@ -1279,7 +1293,7 @@
 			    struct drbd_nl_cfg_reply *reply)
 {
 	int i, ns;
-	enum drbd_ret_codes retcode;
+	enum drbd_ret_code retcode;
 	struct net_conf *new_conf = NULL;
 	struct crypto_hash *tfm = NULL;
 	struct crypto_hash *integrity_w_tfm = NULL;
@@ -1324,6 +1338,8 @@
 	new_conf->wire_protocol    = DRBD_PROT_C;
 	new_conf->ping_timeo	   = DRBD_PING_TIMEO_DEF;
 	new_conf->rr_conflict	   = DRBD_RR_CONFLICT_DEF;
+	new_conf->on_congestion    = DRBD_ON_CONGESTION_DEF;
+	new_conf->cong_extents     = DRBD_CONG_EXTENTS_DEF;
 
 	if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
 		retcode = ERR_MANDATORY_TAG;
@@ -1345,6 +1361,11 @@
 		}
 	}
 
+	if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
+		retcode = ERR_CONG_NOT_PROTO_A;
+		goto fail;
+	}
+
 	if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
 		retcode = ERR_DISCARD;
 		goto fail;
@@ -1525,6 +1546,21 @@
 			      struct drbd_nl_cfg_reply *reply)
 {
 	int retcode;
+	struct disconnect dc;
+
+	memset(&dc, 0, sizeof(struct disconnect));
+	if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	if (dc.force) {
+		spin_lock_irq(&mdev->req_lock);
+		if (mdev->state.conn >= C_WF_CONNECTION)
+			_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL);
+		spin_unlock_irq(&mdev->req_lock);
+		goto done;
+	}
 
 	retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
 
@@ -1842,6 +1878,10 @@
 {
 	int retcode;
 
+	/* If there is still bitmap IO pending, probably because of a previous
+	 * resync just being finished, wait for it before requesting a new resync. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+
 	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
 
 	if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
@@ -1877,6 +1917,10 @@
 {
 	int retcode;
 
+	/* If there is still bitmap IO pending, probably because of a previous
+	 * resync just being finished, wait for it before requesting a new resync. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+
 	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
 
 	if (retcode < SS_SUCCESS) {
@@ -1885,9 +1929,9 @@
 			   into a full resync. */
 			retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
 			if (retcode >= SS_SUCCESS) {
-				/* open coded drbd_bitmap_io() */
 				if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
-						   "set_n_write from invalidate_peer"))
+					"set_n_write from invalidate_peer",
+					BM_LOCKED_SET_ALLOWED))
 					retcode = ERR_IO_MD_DISK;
 			}
 		} else
@@ -1914,9 +1958,17 @@
 			       struct drbd_nl_cfg_reply *reply)
 {
 	int retcode = NO_ERROR;
+	union drbd_state s;
 
-	if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO)
-		retcode = ERR_PAUSE_IS_CLEAR;
+	if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
+		s = mdev->state;
+		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
+			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
+				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
+		} else {
+			retcode = ERR_PAUSE_IS_CLEAR;
+		}
+	}
 
 	reply->ret_code = retcode;
 	return 0;
@@ -2054,6 +2106,11 @@
 		reply->ret_code = ERR_MANDATORY_TAG;
 		return 0;
 	}
+
+	/* If there is still bitmap IO pending, e.g. previous resync or verify
+	 * just being finished, wait for it before requesting a new resync. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+
 	/* w_make_ov_request expects position to be aligned */
 	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
 	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
@@ -2097,7 +2154,8 @@
 	drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
 
 	if (args.clear_bm) {
-		err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid");
+		err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
+			"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
 		if (err) {
 			dev_err(DEV, "Writing bitmap failed with %d\n",err);
 			retcode = ERR_IO_MD_DISK;
@@ -2105,6 +2163,7 @@
 		if (skip_initial_sync) {
 			drbd_send_uuids_skip_initial_sync(mdev);
 			_drbd_uuid_set(mdev, UI_BITMAP, 0);
+			drbd_print_uuids(mdev, "cleared bitmap UUID");
 			spin_lock_irq(&mdev->req_lock);
 			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
 					CS_VERBOSE, NULL);
@@ -2189,7 +2248,8 @@
 		goto fail;
 	}
 
-	if (nlp->packet_type >= P_nl_after_last_packet) {
+	if (nlp->packet_type >= P_nl_after_last_packet ||
+	    nlp->packet_type == P_return_code_only) {
 		retcode = ERR_PACKET_NR;
 		goto fail;
 	}
@@ -2205,7 +2265,7 @@
 	reply_size += cm->reply_body_size;
 
 	/* allocation not in the IO path, cqueue thread context */
-	cn_reply = kmalloc(reply_size, GFP_KERNEL);
+	cn_reply = kzalloc(reply_size, GFP_KERNEL);
 	if (!cn_reply) {
 		retcode = ERR_NOMEM;
 		goto fail;
@@ -2213,7 +2273,7 @@
 	reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
 
 	reply->packet_type =
-		cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet;
+		cm->reply_body_size ? nlp->packet_type : P_return_code_only;
 	reply->minor = nlp->drbd_minor;
 	reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
 	/* reply->tag_list; might be modified by cm->function. */
@@ -2376,7 +2436,7 @@
 	/* receiver thread context, which is not in the writeout path (of this node),
 	 * but may be in the writeout path of the _other_ node.
 	 * GFP_NOIO to avoid potential "distributed deadlock". */
-	cn_reply = kmalloc(
+	cn_reply = kzalloc(
 		sizeof(struct cn_msg)+
 		sizeof(struct drbd_nl_cfg_reply)+
 		sizeof(struct dump_ee_tag_len_struct)+
@@ -2398,10 +2458,11 @@
 	tl = tl_add_int(tl, T_ee_sector, &e->sector);
 	tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
 
+	/* dump the first 32k */
+	len = min_t(unsigned, e->size, 32 << 10);
 	put_unaligned(T_ee_data, tl++);
-	put_unaligned(e->size, tl++);
+	put_unaligned(len, tl++);
 
-	len = e->size;
 	page = e->pages;
 	page_chain_for_each(page) {
 		void *d = kmap_atomic(page, KM_USER0);
@@ -2410,6 +2471,8 @@
 		kunmap_atomic(d, KM_USER0);
 		tl = (unsigned short*)((char*)tl + l);
 		len -= l;
+		if (len == 0)
+			break;
 	}
 	put_unaligned(TT_END, tl++); /* Close the tag list */
 
@@ -2508,6 +2571,7 @@
 		(struct drbd_nl_cfg_reply *)cn_reply->data;
 	int rr;
 
+	memset(buffer, 0, sizeof(buffer));
 	cn_reply->id = req->id;
 
 	cn_reply->seq = req->seq;
@@ -2515,6 +2579,7 @@
 	cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
 	cn_reply->flags = 0;
 
+	reply->packet_type = P_return_code_only;
 	reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
 	reply->ret_code = ret_code;
 

diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 7e6ac30..2959cdf 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c

@@ -34,6 +34,7 @@
 #include "drbd_int.h"
 
 static int drbd_proc_open(struct inode *inode, struct file *file);
+static int drbd_proc_release(struct inode *inode, struct file *file);
 
 
 struct proc_dir_entry *drbd_proc;
@@ -42,9 +43,22 @@
 	.open		= drbd_proc_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= single_release,
+	.release	= drbd_proc_release,
 };
 
+void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
+{
+	/* v is in kB/sec. We don't expect TiByte/sec yet. */
+	if (unlikely(v >= 1000000)) {
+		/* cool: > GiByte/s */
+		seq_printf(seq, "%ld,", v / 1000000);
+		v /= 1000000;
+		seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
+	} else if (likely(v >= 1000))
+		seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
+	else
+		seq_printf(seq, "%ld", v);
+}
 
 /*lge
  * progress bars shamelessly adapted from driver/md/md.c
@@ -71,10 +85,15 @@
 		seq_printf(seq, ".");
 	seq_printf(seq, "] ");
 
-	seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10);
-	/* if more than 1 GB display in MB */
-	if (mdev->rs_total > 0x100000L)
-		seq_printf(seq, "(%lu/%lu)M\n\t",
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		seq_printf(seq, "verified:");
+	else
+		seq_printf(seq, "sync'ed:");
+	seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
+
+	/* if more than a few GB, display in MB */
+	if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
+		seq_printf(seq, "(%lu/%lu)M",
 			    (unsigned long) Bit2KB(rs_left >> 10),
 			    (unsigned long) Bit2KB(mdev->rs_total >> 10));
 	else
@@ -94,6 +113,7 @@
 	/* Rolling marks. last_mark+1 may just now be modified.  last_mark+2 is
 	 * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
 	 * least DRBD_SYNC_MARK_STEP time before it will be modified. */
+	/* ------------------------ ~18s average ------------------------ */
 	i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
 	dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
 	if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
@@ -107,14 +127,24 @@
 	seq_printf(seq, "finish: %lu:%02lu:%02lu",
 		rt / 3600, (rt % 3600) / 60, rt % 60);
 
-	/* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */
 	dbdt = Bit2KB(db/dt);
-	if (dbdt > 1000)
-		seq_printf(seq, " speed: %ld,%03ld",
-			dbdt/1000, dbdt % 1000);
-	else
-		seq_printf(seq, " speed: %ld", dbdt);
+	seq_printf(seq, " speed: ");
+	seq_printf_with_thousands_grouping(seq, dbdt);
+	seq_printf(seq, " (");
+	/* ------------------------- ~3s average ------------------------ */
+	if (proc_details >= 1) {
+		/* this is what drbd_rs_should_slow_down() uses */
+		i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
+		dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
+		if (!dt)
+			dt++;
+		db = mdev->rs_mark_left[i] - rs_left;
+		dbdt = Bit2KB(db/dt);
+		seq_printf_with_thousands_grouping(seq, dbdt);
+		seq_printf(seq, " -- ");
+	}
 
+	/* --------------------- long term average ---------------------- */
 	/* mean speed since syncer started
 	 * we do account for PausedSync periods */
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
@@ -122,20 +152,34 @@
 		dt = 1;
 	db = mdev->rs_total - rs_left;
 	dbdt = Bit2KB(db/dt);
-	if (dbdt > 1000)
-		seq_printf(seq, " (%ld,%03ld)",
-			dbdt/1000, dbdt % 1000);
-	else
-		seq_printf(seq, " (%ld)", dbdt);
+	seq_printf_with_thousands_grouping(seq, dbdt);
+	seq_printf(seq, ")");
 
-	if (mdev->state.conn == C_SYNC_TARGET) {
-		if (mdev->c_sync_rate > 1000)
-			seq_printf(seq, " want: %d,%03d",
-				   mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
-		else
-			seq_printf(seq, " want: %d", mdev->c_sync_rate);
+	if (mdev->state.conn == C_SYNC_TARGET ||
+	    mdev->state.conn == C_VERIFY_S) {
+		seq_printf(seq, " want: ");
+		seq_printf_with_thousands_grouping(seq, mdev->c_sync_rate);
 	}
 	seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
+
+	if (proc_details >= 1) {
+		/* 64 bit:
+		 * we convert to sectors in the display below. */
+		unsigned long bm_bits = drbd_bm_bits(mdev);
+		unsigned long bit_pos;
+		if (mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T)
+			bit_pos = bm_bits - mdev->ov_left;
+		else
+			bit_pos = mdev->bm_resync_fo;
+		/* Total sectors may be slightly off for oddly
+		 * sized devices. So what. */
+		seq_printf(seq,
+			"\t%3d%% sector pos: %llu/%llu\n",
+			(int)(bit_pos / (bm_bits/100+1)),
+			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
+			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
+	}
 }
 
 static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
@@ -232,20 +276,16 @@
 			   mdev->epochs,
 			   write_ordering_chars[mdev->write_ordering]
 			);
-			seq_printf(seq, " oos:%lu\n",
-				   Bit2KB(drbd_bm_total_weight(mdev)));
+			seq_printf(seq, " oos:%llu\n",
+				   Bit2KB((unsigned long long)
+					   drbd_bm_total_weight(mdev)));
 		}
 		if (mdev->state.conn == C_SYNC_SOURCE ||
-		    mdev->state.conn == C_SYNC_TARGET)
+		    mdev->state.conn == C_SYNC_TARGET ||
+		    mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T)
 			drbd_syncer_progress(mdev, seq);
 
-		if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
-			seq_printf(seq, "\t%3d%%      %lu/%lu\n",
-				   (int)((mdev->rs_total-mdev->ov_left) /
-					 (mdev->rs_total/100+1)),
-				   mdev->rs_total - mdev->ov_left,
-				   mdev->rs_total);
-
 		if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) {
 			lc_seq_printf_stats(seq, mdev->resync);
 			lc_seq_printf_stats(seq, mdev->act_log);
@@ -265,7 +305,15 @@
 
 static int drbd_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, drbd_seq_show, PDE(inode)->data);
+	if (try_module_get(THIS_MODULE))
+		return single_open(file, drbd_seq_show, PDE(inode)->data);
+	return -ENODEV;
+}
+
+static int drbd_proc_release(struct inode *inode, struct file *file)
+{
+	module_put(THIS_MODULE);
+	return single_release(inode, file);
 }
 
 /* PROC FS stuff end */

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 8e68be9..fe1564c 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c

@@ -277,7 +277,7 @@
 	atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
 	int i;
 
-	if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count)
+	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
 		i = page_chain_free(page);
 	else {
 		struct page *tmp;
@@ -319,7 +319,7 @@
 	struct page *page;
 	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
 
-	if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE))
+	if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
 		return NULL;
 
 	e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
@@ -725,16 +725,16 @@
 	char tb[4];
 
 	if (!*sock)
-		return FALSE;
+		return false;
 
 	rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
 
 	if (rr > 0 || rr == -EAGAIN) {
-		return TRUE;
+		return true;
 	} else {
 		sock_release(*sock);
 		*sock = NULL;
-		return FALSE;
+		return false;
 	}
 }
 
@@ -768,8 +768,7 @@
 			if (s || ++try >= 3)
 				break;
 			/* give the other side time to call bind() & listen() */
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(HZ / 10);
+			schedule_timeout_interruptible(HZ / 10);
 		}
 
 		if (s) {
@@ -788,8 +787,7 @@
 		}
 
 		if (sock && msock) {
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(HZ / 10);
+			schedule_timeout_interruptible(HZ / 10);
 			ok = drbd_socket_okay(mdev, &sock);
 			ok = drbd_socket_okay(mdev, &msock) && ok;
 			if (ok)
@@ -906,7 +904,7 @@
 		put_ldev(mdev);
 	}
 
-	if (!drbd_send_protocol(mdev))
+	if (drbd_send_protocol(mdev) == -1)
 		return -1;
 	drbd_send_sync_param(mdev, &mdev->sync_conf);
 	drbd_send_sizes(mdev, 0, 0);
@@ -914,6 +912,7 @@
 	drbd_send_state(mdev);
 	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
 	clear_bit(RESIZE_PENDING, &mdev->flags);
+	mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
 
 	return 1;
 
@@ -932,8 +931,9 @@
 
 	r = drbd_recv(mdev, h, sizeof(*h));
 	if (unlikely(r != sizeof(*h))) {
-		dev_err(DEV, "short read expecting header on sock: r=%d\n", r);
-		return FALSE;
+		if (!signal_pending(current))
+			dev_warn(DEV, "short read expecting header on sock: r=%d\n", r);
+		return false;
 	}
 
 	if (likely(h->h80.magic == BE_DRBD_MAGIC)) {
@@ -947,11 +947,11 @@
 		    be32_to_cpu(h->h80.magic),
 		    be16_to_cpu(h->h80.command),
 		    be16_to_cpu(h->h80.length));
-		return FALSE;
+		return false;
 	}
 	mdev->last_received = jiffies;
 
-	return TRUE;
+	return true;
 }
 
 static void drbd_flush(struct drbd_conf *mdev)
@@ -1074,6 +1074,16 @@
  * @mdev:	DRBD device.
  * @e:		epoch entry
  * @rw:		flag field, see bio->bi_rw
+ *
+ * May spread the pages to multiple bios,
+ * depending on bio_add_page restrictions.
+ *
+ * Returns 0 if all bios have been submitted,
+ * -ENOMEM if we could not allocate enough bios,
+ * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
+ *  single page to an empty bio (which should never happen and likely indicates
+ *  that the lower level IO stack is in some way broken). This has been observed
+ *  on certain Xen deployments.
  */
 /* TODO allocate from our own bio_set. */
 int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
@@ -1086,6 +1096,7 @@
 	unsigned ds = e->size;
 	unsigned n_bios = 0;
 	unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
+	int err = -ENOMEM;
 
 	/* In most cases, we will only need one bio.  But in case the lower
 	 * level restrictions happen to be different at this offset on this
@@ -1111,8 +1122,17 @@
 	page_chain_for_each(page) {
 		unsigned len = min_t(unsigned, ds, PAGE_SIZE);
 		if (!bio_add_page(bio, page, len, 0)) {
-			/* a single page must always be possible! */
-			BUG_ON(bio->bi_vcnt == 0);
+			/* A single page must always be possible!
+			 * But in case it fails anyways,
+			 * we deal with it, and complain (below). */
+			if (bio->bi_vcnt == 0) {
+				dev_err(DEV,
+					"bio_add_page failed for len=%u, "
+					"bi_vcnt=0 (bi_sector=%llu)\n",
+					len, (unsigned long long)bio->bi_sector);
+				err = -ENOSPC;
+				goto fail;
+			}
 			goto next_bio;
 		}
 		ds -= len;
@@ -1138,7 +1158,7 @@
 		bios = bios->bi_next;
 		bio_put(bio);
 	}
-	return -ENOMEM;
+	return err;
 }
 
 static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
@@ -1160,7 +1180,7 @@
 	switch (mdev->write_ordering) {
 	case WO_none:
 		if (rv == FE_RECYCLED)
-			return TRUE;
+			return true;
 
 		/* receiver context, in the writeout path of the other node.
 		 * avoid potential distributed deadlock */
@@ -1188,10 +1208,10 @@
 		D_ASSERT(atomic_read(&epoch->active) == 0);
 		D_ASSERT(epoch->flags == 0);
 
-		return TRUE;
+		return true;
 	default:
 		dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
-		return FALSE;
+		return false;
 	}
 
 	epoch->flags = 0;
@@ -1209,7 +1229,7 @@
 	}
 	spin_unlock(&mdev->epoch_lock);
 
-	return TRUE;
+	return true;
 }
 
 /* used from receive_RSDataReply (recv_resync_read)
@@ -1231,21 +1251,25 @@
 	if (dgs) {
 		rr = drbd_recv(mdev, dig_in, dgs);
 		if (rr != dgs) {
-			dev_warn(DEV, "short read receiving data digest: read %d expected %d\n",
-			     rr, dgs);
+			if (!signal_pending(current))
+				dev_warn(DEV,
+					"short read receiving data digest: read %d expected %d\n",
+					rr, dgs);
 			return NULL;
 		}
 	}
 
 	data_size -= dgs;
 
+	ERR_IF(data_size == 0) return NULL;
 	ERR_IF(data_size &  0x1ff) return NULL;
-	ERR_IF(data_size >  DRBD_MAX_SEGMENT_SIZE) return NULL;
+	ERR_IF(data_size >  DRBD_MAX_BIO_SIZE) return NULL;
 
 	/* even though we trust out peer,
 	 * we sometimes have to double check. */
 	if (sector + (data_size>>9) > capacity) {
-		dev_err(DEV, "capacity: %llus < sector: %llus + size: %u\n",
+		dev_err(DEV, "request from peer beyond end of local disk: "
+			"capacity: %llus < sector: %llus + size: %u\n",
 			(unsigned long long)capacity,
 			(unsigned long long)sector, data_size);
 		return NULL;
@@ -1264,15 +1288,16 @@
 		unsigned len = min_t(int, ds, PAGE_SIZE);
 		data = kmap(page);
 		rr = drbd_recv(mdev, data, len);
-		if (FAULT_ACTIVE(mdev, DRBD_FAULT_RECEIVE)) {
+		if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
 			dev_err(DEV, "Fault injection: Corrupting data on receive\n");
 			data[0] = data[0] ^ (unsigned long)-1;
 		}
 		kunmap(page);
 		if (rr != len) {
 			drbd_free_ee(mdev, e);
-			dev_warn(DEV, "short read receiving data: read %d expected %d\n",
-			     rr, len);
+			if (!signal_pending(current))
+				dev_warn(DEV, "short read receiving data: read %d expected %d\n",
+				rr, len);
 			return NULL;
 		}
 		ds -= rr;
@@ -1281,7 +1306,8 @@
 	if (dgs) {
 		drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv);
 		if (memcmp(dig_in, dig_vv, dgs)) {
-			dev_err(DEV, "Digest integrity check FAILED.\n");
+			dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
+				(unsigned long long)sector, data_size);
 			drbd_bcast_ee(mdev, "digest failed",
 					dgs, dig_in, dig_vv, e);
 			drbd_free_ee(mdev, e);
@@ -1302,7 +1328,7 @@
 	void *data;
 
 	if (!data_size)
-		return TRUE;
+		return true;
 
 	page = drbd_pp_alloc(mdev, 1, 1);
 
@@ -1311,8 +1337,10 @@
 		rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE));
 		if (rr != min_t(int, data_size, PAGE_SIZE)) {
 			rv = 0;
-			dev_warn(DEV, "short read receiving data: read %d expected %d\n",
-			     rr, min_t(int, data_size, PAGE_SIZE));
+			if (!signal_pending(current))
+				dev_warn(DEV,
+					"short read receiving data: read %d expected %d\n",
+					rr, min_t(int, data_size, PAGE_SIZE));
 			break;
 		}
 		data_size -= rr;
@@ -1337,8 +1365,10 @@
 	if (dgs) {
 		rr = drbd_recv(mdev, dig_in, dgs);
 		if (rr != dgs) {
-			dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n",
-			     rr, dgs);
+			if (!signal_pending(current))
+				dev_warn(DEV,
+					"short read receiving data reply digest: read %d expected %d\n",
+					rr, dgs);
 			return 0;
 		}
 	}
@@ -1359,9 +1389,10 @@
 			     expect);
 		kunmap(bvec->bv_page);
 		if (rr != expect) {
-			dev_warn(DEV, "short read receiving data reply: "
-			     "read %d expected %d\n",
-			     rr, expect);
+			if (!signal_pending(current))
+				dev_warn(DEV, "short read receiving data reply: "
+					"read %d expected %d\n",
+					rr, expect);
 			return 0;
 		}
 		data_size -= rr;
@@ -1425,11 +1456,10 @@
 
 	atomic_add(data_size >> 9, &mdev->rs_sect_ev);
 	if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
-		return TRUE;
+		return true;
 
-	/* drbd_submit_ee currently fails for one reason only:
-	 * not being able to allocate enough bios.
-	 * Is dropping the connection going to help? */
+	/* don't care for the reason here */
+	dev_err(DEV, "submit failed, triggering re-connect\n");
 	spin_lock_irq(&mdev->req_lock);
 	list_del(&e->w.list);
 	spin_unlock_irq(&mdev->req_lock);
@@ -1437,7 +1467,7 @@
 	drbd_free_ee(mdev, e);
 fail:
 	put_ldev(mdev);
-	return FALSE;
+	return false;
 }
 
 static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
@@ -1454,7 +1484,7 @@
 	spin_unlock_irq(&mdev->req_lock);
 	if (unlikely(!req)) {
 		dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n");
-		return FALSE;
+		return false;
 	}
 
 	/* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
@@ -1611,15 +1641,15 @@
 	return ret;
 }
 
-static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
+/* see also bio_flags_to_wire()
+ * DRBD_REQ_*, because we need to semantically map the flags to data packet
+ * flags and back. We may replicate to other kernel versions. */
+static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
 {
-	if (mdev->agreed_pro_version >= 95)
-		return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
-			(dpf & DP_FUA ? REQ_FUA : 0) |
-			(dpf & DP_FLUSH ? REQ_FUA : 0) |
-			(dpf & DP_DISCARD ? REQ_DISCARD : 0);
-	else
-		return dpf & DP_RW_SYNC ? REQ_SYNC : 0;
+	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
+		(dpf & DP_FUA ? REQ_FUA : 0) |
+		(dpf & DP_FLUSH ? REQ_FLUSH : 0) |
+		(dpf & DP_DISCARD ? REQ_DISCARD : 0);
 }
 
 /* mirrored write */
@@ -1632,9 +1662,6 @@
 	u32 dp_flags;
 
 	if (!get_ldev(mdev)) {
-		if (__ratelimit(&drbd_ratelimit_state))
-			dev_err(DEV, "Can not write mirrored data block "
-			    "to local disk.\n");
 		spin_lock(&mdev->peer_seq_lock);
 		if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
 			mdev->peer_seq++;
@@ -1654,23 +1681,23 @@
 	e = read_in_block(mdev, p->block_id, sector, data_size);
 	if (!e) {
 		put_ldev(mdev);
-		return FALSE;
+		return false;
 	}
 
 	e->w.cb = e_end_block;
 
+	dp_flags = be32_to_cpu(p->dp_flags);
+	rw |= wire_flags_to_bio(mdev, dp_flags);
+
+	if (dp_flags & DP_MAY_SET_IN_SYNC)
+		e->flags |= EE_MAY_SET_IN_SYNC;
+
 	spin_lock(&mdev->epoch_lock);
 	e->epoch = mdev->current_epoch;
 	atomic_inc(&e->epoch->epoch_size);
 	atomic_inc(&e->epoch->active);
 	spin_unlock(&mdev->epoch_lock);
 
-	dp_flags = be32_to_cpu(p->dp_flags);
-	rw |= write_flags_to_bio(mdev, dp_flags);
-
-	if (dp_flags & DP_MAY_SET_IN_SYNC)
-		e->flags |= EE_MAY_SET_IN_SYNC;
-
 	/* I'm the receiver, I do hold a net_cnt reference. */
 	if (!mdev->net_conf->two_primaries) {
 		spin_lock_irq(&mdev->req_lock);
@@ -1773,7 +1800,7 @@
 				put_ldev(mdev);
 				wake_asender(mdev);
 				finish_wait(&mdev->misc_wait, &wait);
-				return TRUE;
+				return true;
 			}
 
 			if (signal_pending(current)) {
@@ -1829,11 +1856,10 @@
 	}
 
 	if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
-		return TRUE;
+		return true;
 
-	/* drbd_submit_ee currently fails for one reason only:
-	 * not being able to allocate enough bios.
-	 * Is dropping the connection going to help? */
+	/* don't care for the reason here */
+	dev_err(DEV, "submit failed, triggering re-connect\n");
 	spin_lock_irq(&mdev->req_lock);
 	list_del(&e->w.list);
 	hlist_del_init(&e->colision);
@@ -1842,12 +1868,10 @@
 		drbd_al_complete_io(mdev, e->sector);
 
 out_interrupted:
-	/* yes, the epoch_size now is imbalanced.
-	 * but we drop the connection anyways, so we don't have a chance to
-	 * receive a barrier... atomic_inc(&mdev->epoch_size); */
+	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP);
 	put_ldev(mdev);
 	drbd_free_ee(mdev, e);
-	return FALSE;
+	return false;
 }
 
 /* We may throttle resync, if the lower device seems to be busy,
@@ -1861,10 +1885,11 @@
  * The current sync rate used here uses only the most recent two step marks,
  * to have a short time average so we can react faster.
  */
-int drbd_rs_should_slow_down(struct drbd_conf *mdev)
+int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
 {
 	struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
 	unsigned long db, dt, dbdt;
+	struct lc_element *tmp;
 	int curr_events;
 	int throttle = 0;
 
@@ -1872,9 +1897,22 @@
 	if (mdev->sync_conf.c_min_rate == 0)
 		return 0;
 
+	spin_lock_irq(&mdev->al_lock);
+	tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
+	if (tmp) {
+		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
+		if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
+			spin_unlock_irq(&mdev->al_lock);
+			return 0;
+		}
+		/* Do not slow down if app IO is already waiting for this extent */
+	}
+	spin_unlock_irq(&mdev->al_lock);
+
 	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
 		      (int)part_stat_read(&disk->part0, sectors[1]) -
 			atomic_read(&mdev->rs_sect_ev);
+
 	if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
 		unsigned long rs_left;
 		int i;
@@ -1883,8 +1921,12 @@
 
 		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
 		 * approx. */
-		i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS;
-		rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+		i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
+
+		if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+			rs_left = mdev->ov_left;
+		else
+			rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
 
 		dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
 		if (!dt)
@@ -1912,15 +1954,15 @@
 	sector = be64_to_cpu(p->sector);
 	size   = be32_to_cpu(p->blksize);
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
 				(unsigned long long)sector, size);
-		return FALSE;
+		return false;
 	}
 	if (sector + (size>>9) > capacity) {
 		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
 				(unsigned long long)sector, size);
-		return FALSE;
+		return false;
 	}
 
 	if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
@@ -1957,7 +1999,7 @@
 	e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
 	if (!e) {
 		put_ldev(mdev);
-		return FALSE;
+		return false;
 	}
 
 	switch (cmd) {
@@ -1970,6 +2012,8 @@
 	case P_RS_DATA_REQUEST:
 		e->w.cb = w_e_end_rsdata_req;
 		fault_type = DRBD_FAULT_RS_RD;
+		/* used in the sector offset progress display */
+		mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
 		break;
 
 	case P_OV_REPLY:
@@ -1991,7 +2035,11 @@
 		if (cmd == P_CSUM_RS_REQUEST) {
 			D_ASSERT(mdev->agreed_pro_version >= 89);
 			e->w.cb = w_e_end_csum_rs_req;
+			/* used in the sector offset progress display */
+			mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
 		} else if (cmd == P_OV_REPLY) {
+			/* track progress, we may need to throttle */
+			atomic_add(size >> 9, &mdev->rs_sect_in);
 			e->w.cb = w_e_end_ov_reply;
 			dec_rs_pending(mdev);
 			/* drbd_rs_begin_io done when we sent this request,
@@ -2003,9 +2051,16 @@
 	case P_OV_REQUEST:
 		if (mdev->ov_start_sector == ~(sector_t)0 &&
 		    mdev->agreed_pro_version >= 90) {
+			unsigned long now = jiffies;
+			int i;
 			mdev->ov_start_sector = sector;
 			mdev->ov_position = sector;
-			mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector);
+			mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
+			mdev->rs_total = mdev->ov_left;
+			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+				mdev->rs_mark_left[i] = mdev->ov_left;
+				mdev->rs_mark_time[i] = now;
+			}
 			dev_info(DEV, "Online Verify start sector: %llu\n",
 					(unsigned long long)sector);
 		}
@@ -2042,9 +2097,9 @@
 	 * we would also throttle its application reads.
 	 * In that case, throttling is done on the SyncTarget only.
 	 */
-	if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev))
-		msleep(100);
-	if (drbd_rs_begin_io(mdev, e->sector))
+	if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
+		schedule_timeout_uninterruptible(HZ/10);
+	if (drbd_rs_begin_io(mdev, sector))
 		goto out_free_e;
 
 submit_for_resync:
@@ -2057,11 +2112,10 @@
 	spin_unlock_irq(&mdev->req_lock);
 
 	if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
-		return TRUE;
+		return true;
 
-	/* drbd_submit_ee currently fails for one reason only:
-	 * not being able to allocate enough bios.
-	 * Is dropping the connection going to help? */
+	/* don't care for the reason here */
+	dev_err(DEV, "submit failed, triggering re-connect\n");
 	spin_lock_irq(&mdev->req_lock);
 	list_del(&e->w.list);
 	spin_unlock_irq(&mdev->req_lock);
@@ -2070,7 +2124,7 @@
 out_free_e:
 	put_ldev(mdev);
 	drbd_free_ee(mdev, e);
-	return FALSE;
+	return false;
 }
 
 static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
@@ -2147,10 +2201,7 @@
 
 static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
 {
-	int self, peer, hg, rv = -100;
-
-	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
-	peer = mdev->p_uuid[UI_BITMAP] & 1;
+	int hg, rv = -100;
 
 	switch (mdev->net_conf->after_sb_1p) {
 	case ASB_DISCARD_YOUNGER_PRI:
@@ -2177,12 +2228,14 @@
 	case ASB_CALL_HELPER:
 		hg = drbd_asb_recover_0p(mdev);
 		if (hg == -1 && mdev->state.role == R_PRIMARY) {
-			self = drbd_set_role(mdev, R_SECONDARY, 0);
+			enum drbd_state_rv rv2;
+
+			drbd_set_role(mdev, R_SECONDARY, 0);
 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
 			  * we do not need to wait for the after state change work either. */
-			self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
-			if (self != SS_SUCCESS) {
+			rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+			if (rv2 != SS_SUCCESS) {
 				drbd_khelper(mdev, "pri-lost-after-sb");
 			} else {
 				dev_warn(DEV, "Successfully gave up primary role.\n");
@@ -2197,10 +2250,7 @@
 
 static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
 {
-	int self, peer, hg, rv = -100;
-
-	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
-	peer = mdev->p_uuid[UI_BITMAP] & 1;
+	int hg, rv = -100;
 
 	switch (mdev->net_conf->after_sb_2p) {
 	case ASB_DISCARD_YOUNGER_PRI:
@@ -2220,11 +2270,13 @@
 	case ASB_CALL_HELPER:
 		hg = drbd_asb_recover_0p(mdev);
 		if (hg == -1) {
+			enum drbd_state_rv rv2;
+
 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
 			  * we do not need to wait for the after state change work either. */
-			self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
-			if (self != SS_SUCCESS) {
+			rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+			if (rv2 != SS_SUCCESS) {
 				drbd_khelper(mdev, "pri-lost-after-sb");
 			} else {
 				dev_warn(DEV, "Successfully gave up primary role.\n");
@@ -2263,6 +2315,8 @@
    -2	C_SYNC_TARGET set BitMap
  -100	after split brain, disconnect
 -1000	unrelated data
+-1091   requires proto 91
+-1096   requires proto 96
  */
 static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
 {
@@ -2292,7 +2346,7 @@
 		if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
 
 			if (mdev->agreed_pro_version < 91)
-				return -1001;
+				return -1091;
 
 			if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
 			    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
@@ -2313,7 +2367,7 @@
 		if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
 
 			if (mdev->agreed_pro_version < 91)
-				return -1001;
+				return -1091;
 
 			if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
 			    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
@@ -2358,17 +2412,22 @@
 	*rule_nr = 51;
 	peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
 	if (self == peer) {
-		self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
-		peer = mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1);
-		if (self == peer) {
+		if (mdev->agreed_pro_version < 96 ?
+		    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
+		    (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
+		    peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
 			/* The last P_SYNC_UUID did not get though. Undo the last start of
 			   resync as sync source modifications of the peer's UUIDs. */
 
 			if (mdev->agreed_pro_version < 91)
-				return -1001;
+				return -1091;
 
 			mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
 			mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
+
+			dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
+			drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
+
 			return -1;
 		}
 	}
@@ -2390,20 +2449,20 @@
 	*rule_nr = 71;
 	self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
 	if (self == peer) {
-		self = mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1);
-		peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
-		if (self == peer) {
+		if (mdev->agreed_pro_version < 96 ?
+		    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
+		    (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
+		    self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
 			/* The last P_SYNC_UUID did not get though. Undo the last start of
 			   resync as sync source modifications of our UUIDs. */
 
 			if (mdev->agreed_pro_version < 91)
-				return -1001;
+				return -1091;
 
 			_drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
 			_drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
 
-			dev_info(DEV, "Undid last start of resync:\n");
-
+			dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
 			drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
 				       mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
 
@@ -2466,8 +2525,8 @@
 		dev_alert(DEV, "Unrelated data, aborting!\n");
 		return C_MASK;
 	}
-	if (hg == -1001) {
-		dev_alert(DEV, "To resolve this both sides have to support at least protocol\n");
+	if (hg < -1000) {
+		dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
 		return C_MASK;
 	}
 
@@ -2566,7 +2625,8 @@
 
 	if (abs(hg) >= 2) {
 		dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
-		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake"))
+		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
+					BM_LOCKED_SET_ALLOWED))
 			return C_MASK;
 	}
 
@@ -2660,7 +2720,7 @@
 		unsigned char *my_alg = mdev->net_conf->integrity_alg;
 
 		if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
-			return FALSE;
+			return false;
 
 		p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
 		if (strcmp(p_integrity_alg, my_alg)) {
@@ -2671,11 +2731,11 @@
 		     my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
 	}
 
-	return TRUE;
+	return true;
 
 disconnect:
 	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-	return FALSE;
+	return false;
 }
 
 /* helper function
@@ -2707,7 +2767,7 @@
 
 static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size)
 {
-	int ok = TRUE;
+	int ok = true;
 	struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95;
 	unsigned int header_size, data_size, exp_max_sz;
 	struct crypto_hash *verify_tfm = NULL;
@@ -2725,7 +2785,7 @@
 	if (packet_size > exp_max_sz) {
 		dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
 		    packet_size, exp_max_sz);
-		return FALSE;
+		return false;
 	}
 
 	if (apv <= 88) {
@@ -2745,7 +2805,7 @@
 	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
 
 	if (drbd_recv(mdev, &p->head.payload, header_size) != header_size)
-		return FALSE;
+		return false;
 
 	mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
 
@@ -2755,11 +2815,11 @@
 				dev_err(DEV, "verify-alg too long, "
 				    "peer wants %u, accepting only %u byte\n",
 						data_size, SHARED_SECRET_MAX);
-				return FALSE;
+				return false;
 			}
 
 			if (drbd_recv(mdev, p->verify_alg, data_size) != data_size)
-				return FALSE;
+				return false;
 
 			/* we expect NUL terminated string */
 			/* but just in case someone tries to be evil */
@@ -2853,7 +2913,7 @@
 	/* but free the verify_tfm again, if csums_tfm did not work out */
 	crypto_free_hash(verify_tfm);
 	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-	return FALSE;
+	return false;
 }
 
 static void drbd_setup_order_type(struct drbd_conf *mdev, int peer)
@@ -2879,7 +2939,7 @@
 {
 	struct p_sizes *p = &mdev->data.rbuf.sizes;
 	enum determine_dev_size dd = unchanged;
-	unsigned int max_seg_s;
+	unsigned int max_bio_size;
 	sector_t p_size, p_usize, my_usize;
 	int ldsc = 0; /* local disk size changed */
 	enum dds_flags ddsf;
@@ -2890,7 +2950,7 @@
 	if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
 		dev_err(DEV, "some backing storage is needed\n");
 		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		return FALSE;
+		return false;
 	}
 
 	/* just store the peer's disk size for now.
@@ -2927,18 +2987,17 @@
 			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
 			mdev->ldev->dc.disk_size = my_usize;
 			put_ldev(mdev);
-			return FALSE;
+			return false;
 		}
 		put_ldev(mdev);
 	}
-#undef min_not_zero
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
 		dd = drbd_determin_dev_size(mdev, ddsf);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
-			return FALSE;
+			return false;
 		drbd_md_sync(mdev);
 	} else {
 		/* I am diskless, need to accept the peer's size. */
@@ -2952,14 +3011,14 @@
 		}
 
 		if (mdev->agreed_pro_version < 94)
-			max_seg_s = be32_to_cpu(p->max_segment_size);
+			max_bio_size = be32_to_cpu(p->max_bio_size);
 		else if (mdev->agreed_pro_version == 94)
-			max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
+			max_bio_size = DRBD_MAX_SIZE_H80_PACKET;
 		else /* drbd 8.3.8 onwards */
-			max_seg_s = DRBD_MAX_SEGMENT_SIZE;
+			max_bio_size = DRBD_MAX_BIO_SIZE;
 
-		if (max_seg_s != queue_max_segment_size(mdev->rq_queue))
-			drbd_setup_queue_param(mdev, max_seg_s);
+		if (max_bio_size != queue_max_hw_sectors(mdev->rq_queue) << 9)
+			drbd_setup_queue_param(mdev, max_bio_size);
 
 		drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type));
 		put_ldev(mdev);
@@ -2985,14 +3044,14 @@
 		}
 	}
 
-	return TRUE;
+	return true;
 }
 
 static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
 {
 	struct p_uuids *p = &mdev->data.rbuf.uuids;
 	u64 *p_uuid;
-	int i;
+	int i, updated_uuids = 0;
 
 	p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
 
@@ -3009,7 +3068,7 @@
 		dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
 		    (unsigned long long)mdev->ed_uuid);
 		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		return FALSE;
+		return false;
 	}
 
 	if (get_ldev(mdev)) {
@@ -3021,19 +3080,21 @@
 		if (skip_initial_sync) {
 			dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
 			drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
-					"clear_n_write from receive_uuids");
+					"clear_n_write from receive_uuids",
+					BM_LOCKED_TEST_ALLOWED);
 			_drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
 			_drbd_uuid_set(mdev, UI_BITMAP, 0);
 			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
 					CS_VERBOSE, NULL);
 			drbd_md_sync(mdev);
+			updated_uuids = 1;
 		}
 		put_ldev(mdev);
 	} else if (mdev->state.disk < D_INCONSISTENT &&
 		   mdev->state.role == R_PRIMARY) {
 		/* I am a diskless primary, the peer just created a new current UUID
 		   for me. */
-		drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
+		updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
 	}
 
 	/* Before we test for the disk state, we should wait until an eventually
@@ -3042,9 +3103,12 @@
 	   new disk state... */
 	wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
 	if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
-		drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
+		updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
 
-	return TRUE;
+	if (updated_uuids)
+		drbd_print_uuids(mdev, "receiver updated UUIDs to");
+
+	return true;
 }
 
 /**
@@ -3081,7 +3145,7 @@
 {
 	struct p_req_state *p = &mdev->data.rbuf.req_state;
 	union drbd_state mask, val;
-	int rv;
+	enum drbd_state_rv rv;
 
 	mask.i = be32_to_cpu(p->mask);
 	val.i = be32_to_cpu(p->val);
@@ -3089,7 +3153,7 @@
 	if (test_bit(DISCARD_CONCURRENT, &mdev->flags) &&
 	    test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
 		drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
-		return TRUE;
+		return true;
 	}
 
 	mask = convert_state(mask);
@@ -3100,7 +3164,7 @@
 	drbd_send_sr_reply(mdev, rv);
 	drbd_md_sync(mdev);
 
-	return TRUE;
+	return true;
 }
 
 static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
@@ -3145,7 +3209,7 @@
 			 peer_state.conn == C_CONNECTED) {
 			if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
 				drbd_resync_finished(mdev);
-			return TRUE;
+			return true;
 		}
 	}
 
@@ -3161,6 +3225,9 @@
 	if (ns.conn == C_WF_REPORT_PARAMS)
 		ns.conn = C_CONNECTED;
 
+	if (peer_state.conn == C_AHEAD)
+		ns.conn = C_BEHIND;
+
 	if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
 	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
 		int cr; /* consider resync */
@@ -3195,10 +3262,10 @@
 				real_peer_disk = D_DISKLESS;
 			} else {
 				if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
-					return FALSE;
+					return false;
 				D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
 				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-				return FALSE;
+				return false;
 			}
 		}
 	}
@@ -3223,7 +3290,7 @@
 		drbd_uuid_new_current(mdev);
 		clear_bit(NEW_CUR_UUID, &mdev->flags);
 		drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
-		return FALSE;
+		return false;
 	}
 	rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
 	ns = mdev->state;
@@ -3231,7 +3298,7 @@
 
 	if (rv < SS_SUCCESS) {
 		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		return FALSE;
+		return false;
 	}
 
 	if (os.conn > C_WF_REPORT_PARAMS) {
@@ -3249,7 +3316,7 @@
 
 	drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
 
-	return TRUE;
+	return true;
 }
 
 static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
@@ -3258,6 +3325,7 @@
 
 	wait_event(mdev->misc_wait,
 		   mdev->state.conn == C_WF_SYNC_UUID ||
+		   mdev->state.conn == C_BEHIND ||
 		   mdev->state.conn < C_CONNECTED ||
 		   mdev->state.disk < D_NEGOTIATING);
 
@@ -3269,32 +3337,42 @@
 		_drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
 		_drbd_uuid_set(mdev, UI_BITMAP, 0UL);
 
+		drbd_print_uuids(mdev, "updated sync uuid");
 		drbd_start_resync(mdev, C_SYNC_TARGET);
 
 		put_ldev(mdev);
 	} else
 		dev_err(DEV, "Ignoring SyncUUID packet!\n");
 
-	return TRUE;
+	return true;
 }
 
-enum receive_bitmap_ret { OK, DONE, FAILED };
-
-static enum receive_bitmap_ret
+/**
+ * receive_bitmap_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
 receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
 		     unsigned long *buffer, struct bm_xfer_ctx *c)
 {
 	unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
 	unsigned want = num_words * sizeof(long);
+	int err;
 
 	if (want != data_size) {
 		dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
-		return FAILED;
+		return -EIO;
 	}
 	if (want == 0)
-		return DONE;
-	if (drbd_recv(mdev, buffer, want) != want)
-		return FAILED;
+		return 0;
+	err = drbd_recv(mdev, buffer, want);
+	if (err != want) {
+		if (err >= 0)
+			err = -EIO;
+		return err;
+	}
 
 	drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
 
@@ -3303,10 +3381,16 @@
 	if (c->bit_offset > c->bm_bits)
 		c->bit_offset = c->bm_bits;
 
-	return OK;
+	return 1;
 }
 
-static enum receive_bitmap_ret
+/**
+ * recv_bm_rle_bits
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
 recv_bm_rle_bits(struct drbd_conf *mdev,
 		struct p_compressed_bm *p,
 		struct bm_xfer_ctx *c)
@@ -3326,18 +3410,18 @@
 
 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
 	if (bits < 0)
-		return FAILED;
+		return -EIO;
 
 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
 		bits = vli_decode_bits(&rl, look_ahead);
 		if (bits <= 0)
-			return FAILED;
+			return -EIO;
 
 		if (toggle) {
 			e = s + rl -1;
 			if (e >= c->bm_bits) {
 				dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
-				return FAILED;
+				return -EIO;
 			}
 			_drbd_bm_set_bits(mdev, s, e);
 		}
@@ -3347,14 +3431,14 @@
 				have, bits, look_ahead,
 				(unsigned int)(bs.cur.b - p->code),
 				(unsigned int)bs.buf_len);
-			return FAILED;
+			return -EIO;
 		}
 		look_ahead >>= bits;
 		have -= bits;
 
 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
 		if (bits < 0)
-			return FAILED;
+			return -EIO;
 		look_ahead |= tmp << have;
 		have += bits;
 	}
@@ -3362,10 +3446,16 @@
 	c->bit_offset = s;
 	bm_xfer_ctx_bit_to_word_offset(c);
 
-	return (s == c->bm_bits) ? DONE : OK;
+	return (s != c->bm_bits);
 }
 
-static enum receive_bitmap_ret
+/**
+ * decode_bitmap_c
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
 decode_bitmap_c(struct drbd_conf *mdev,
 		struct p_compressed_bm *p,
 		struct bm_xfer_ctx *c)
@@ -3379,7 +3469,7 @@
 
 	dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
 	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
-	return FAILED;
+	return -EIO;
 }
 
 void INFO_bm_xfer_stats(struct drbd_conf *mdev,
@@ -3428,13 +3518,13 @@
 {
 	struct bm_xfer_ctx c;
 	void *buffer;
-	enum receive_bitmap_ret ret;
-	int ok = FALSE;
+	int err;
+	int ok = false;
 	struct p_header80 *h = &mdev->data.rbuf.header.h80;
 
-	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
-
-	drbd_bm_lock(mdev, "receive bitmap");
+	drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
+	/* you are supposed to send additional out-of-sync information
+	 * if you actually set bits during this phase */
 
 	/* maybe we should use some per thread scratch page,
 	 * and allocate that during initial device creation? */
@@ -3449,9 +3539,9 @@
 		.bm_words = drbd_bm_words(mdev),
 	};
 
-	do {
+	for(;;) {
 		if (cmd == P_BITMAP) {
-			ret = receive_bitmap_plain(mdev, data_size, buffer, &c);
+			err = receive_bitmap_plain(mdev, data_size, buffer, &c);
 		} else if (cmd == P_COMPRESSED_BITMAP) {
 			/* MAYBE: sanity check that we speak proto >= 90,
 			 * and the feature is enabled! */
@@ -3468,9 +3558,9 @@
 				goto out;
 			if (data_size <= (sizeof(*p) - sizeof(p->head))) {
 				dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
-				return FAILED;
+				goto out;
 			}
-			ret = decode_bitmap_c(mdev, p, &c);
+			err = decode_bitmap_c(mdev, p, &c);
 		} else {
 			dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
 			goto out;
@@ -3479,24 +3569,26 @@
 		c.packets[cmd == P_BITMAP]++;
 		c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size;
 
-		if (ret != OK)
+		if (err <= 0) {
+			if (err < 0)
+				goto out;
 			break;
-
+		}
 		if (!drbd_recv_header(mdev, &cmd, &data_size))
 			goto out;
-	} while (ret == OK);
-	if (ret == FAILED)
-		goto out;
+	}
 
 	INFO_bm_xfer_stats(mdev, "receive", &c);
 
 	if (mdev->state.conn == C_WF_BITMAP_T) {
+		enum drbd_state_rv rv;
+
 		ok = !drbd_send_bitmap(mdev);
 		if (!ok)
 			goto out;
 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
-		ok = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
-		D_ASSERT(ok == SS_SUCCESS);
+		rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+		D_ASSERT(rv == SS_SUCCESS);
 	} else if (mdev->state.conn != C_WF_BITMAP_S) {
 		/* admin may have requested C_DISCONNECTING,
 		 * other threads may have noticed network errors */
@@ -3504,7 +3596,7 @@
 		    drbd_conn_str(mdev->state.conn));
 	}
 
-	ok = TRUE;
+	ok = true;
  out:
 	drbd_bm_unlock(mdev);
 	if (ok && mdev->state.conn == C_WF_BITMAP_S)
@@ -3538,7 +3630,26 @@
 	 * with the data requests being unplugged */
 	drbd_tcp_quickack(mdev->data.socket);
 
-	return TRUE;
+	return true;
+}
+
+static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct p_block_desc *p = &mdev->data.rbuf.block_desc;
+
+	switch (mdev->state.conn) {
+	case C_WF_SYNC_UUID:
+	case C_WF_BITMAP_T:
+	case C_BEHIND:
+			break;
+	default:
+		dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
+				drbd_conn_str(mdev->state.conn));
+	}
+
+	drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
+
+	return true;
 }
 
 typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
@@ -3571,6 +3682,7 @@
 	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
 	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
 	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
+	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
 	/* anything missing from this table is in
 	 * the asender_tbl, see get_asender_cmd */
 	[P_MAX_CMD]	    = { 0, 0, NULL },
@@ -3610,7 +3722,8 @@
 		if (shs) {
 			rv = drbd_recv(mdev, &header->h80.payload, shs);
 			if (unlikely(rv != shs)) {
-				dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
+				if (!signal_pending(current))
+					dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv);
 				goto err_out;
 			}
 		}
@@ -3682,9 +3795,6 @@
 
 	if (mdev->state.conn == C_STANDALONE)
 		return;
-	if (mdev->state.conn >= C_WF_CONNECTION)
-		dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n",
-				drbd_conn_str(mdev->state.conn));
 
 	/* asender does not clean up anything. it must not interfere, either */
 	drbd_thread_stop(&mdev->asender);
@@ -3713,6 +3823,8 @@
 	atomic_set(&mdev->rs_pending_cnt, 0);
 	wake_up(&mdev->misc_wait);
 
+	del_timer(&mdev->request_timer);
+
 	/* make sure syncer is stopped and w_resume_next_sg queued */
 	del_timer_sync(&mdev->resync_timer);
 	resync_timer_fn((unsigned long)mdev);
@@ -3758,13 +3870,6 @@
 	if (os.conn == C_DISCONNECTING) {
 		wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
 
-		if (!is_susp(mdev->state)) {
-			/* we must not free the tl_hash
-			 * while application io is still on the fly */
-			wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
-			drbd_free_tl_hash(mdev);
-		}
-
 		crypto_free_hash(mdev->cram_hmac_tfm);
 		mdev->cram_hmac_tfm = NULL;
 
@@ -3773,6 +3878,10 @@
 		drbd_request_state(mdev, NS(conn, C_STANDALONE));
 	}
 
+	/* serialize with bitmap writeout triggered by the state change,
+	 * if any. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+
 	/* tcp_close and release of sendpage pages can be deferred.  I don't
 	 * want to use SO_LINGER, because apparently it can be deferred for
 	 * more than 20 seconds (longest time I checked).
@@ -3873,7 +3982,8 @@
 	rv = drbd_recv(mdev, &p->head.payload, expect);
 
 	if (rv != expect) {
-		dev_err(DEV, "short read receiving handshake packet: l=%u\n", rv);
+		if (!signal_pending(current))
+			dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv);
 		return 0;
 	}
 
@@ -3975,7 +4085,8 @@
 	rv = drbd_recv(mdev, peers_ch, length);
 
 	if (rv != length) {
-		dev_err(DEV, "short read AuthChallenge: l=%u\n", rv);
+		if (!signal_pending(current))
+			dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv);
 		rv = 0;
 		goto fail;
 	}
@@ -4022,7 +4133,8 @@
 	rv = drbd_recv(mdev, response , resp_size);
 
 	if (rv != resp_size) {
-		dev_err(DEV, "short read receiving AuthResponse: l=%u\n", rv);
+		if (!signal_pending(current))
+			dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv);
 		rv = 0;
 		goto fail;
 	}
@@ -4074,8 +4186,7 @@
 		h = drbd_connect(mdev);
 		if (h == 0) {
 			drbd_disconnect(mdev);
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(HZ);
+			schedule_timeout_interruptible(HZ);
 		}
 		if (h == -1) {
 			dev_warn(DEV, "Discarding network configuration.\n");
@@ -4113,7 +4224,7 @@
 	}
 	wake_up(&mdev->state_wait);
 
-	return TRUE;
+	return true;
 }
 
 static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4129,7 +4240,7 @@
 	if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
 		wake_up(&mdev->misc_wait);
 
-	return TRUE;
+	return true;
 }
 
 static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4152,7 +4263,7 @@
 	dec_rs_pending(mdev);
 	atomic_add(blksize >> 9, &mdev->rs_sect_in);
 
-	return TRUE;
+	return true;
 }
 
 /* when we receive the ACK for a write request,
@@ -4176,8 +4287,6 @@
 			return req;
 		}
 	}
-	dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n",
-		(void *)(unsigned long)id, (unsigned long long)sector);
 	return NULL;
 }
 
@@ -4195,15 +4304,17 @@
 	req = validator(mdev, id, sector);
 	if (unlikely(!req)) {
 		spin_unlock_irq(&mdev->req_lock);
-		dev_err(DEV, "%s: got a corrupt block_id/sector pair\n", func);
-		return FALSE;
+
+		dev_err(DEV, "%s: failed to find req %p, sector %llus\n", func,
+			(void *)(unsigned long)id, (unsigned long long)sector);
+		return false;
 	}
 	__req_mod(req, what, &m);
 	spin_unlock_irq(&mdev->req_lock);
 
 	if (m.bio)
 		complete_master_bio(mdev, &m);
-	return TRUE;
+	return true;
 }
 
 static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4218,7 +4329,7 @@
 	if (is_syncer_block_id(p->block_id)) {
 		drbd_set_in_sync(mdev, sector, blksize);
 		dec_rs_pending(mdev);
-		return TRUE;
+		return true;
 	}
 	switch (be16_to_cpu(h->command)) {
 	case P_RS_WRITE_ACK:
@@ -4239,7 +4350,7 @@
 		break;
 	default:
 		D_ASSERT(0);
-		return FALSE;
+		return false;
 	}
 
 	return validate_req_change_req_state(mdev, p->block_id, sector,
@@ -4250,20 +4361,44 @@
 {
 	struct p_block_ack *p = (struct p_block_ack *)h;
 	sector_t sector = be64_to_cpu(p->sector);
-
-	if (__ratelimit(&drbd_ratelimit_state))
-		dev_warn(DEV, "Got NegAck packet. Peer is in troubles?\n");
+	int size = be32_to_cpu(p->blksize);
+	struct drbd_request *req;
+	struct bio_and_error m;
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
 	if (is_syncer_block_id(p->block_id)) {
-		int size = be32_to_cpu(p->blksize);
 		dec_rs_pending(mdev);
 		drbd_rs_failed_io(mdev, sector, size);
-		return TRUE;
+		return true;
 	}
-	return validate_req_change_req_state(mdev, p->block_id, sector,
-		_ack_id_to_req, __func__ , neg_acked);
+
+	spin_lock_irq(&mdev->req_lock);
+	req = _ack_id_to_req(mdev, p->block_id, sector);
+	if (!req) {
+		spin_unlock_irq(&mdev->req_lock);
+		if (mdev->net_conf->wire_protocol == DRBD_PROT_A ||
+		    mdev->net_conf->wire_protocol == DRBD_PROT_B) {
+			/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
+			   The master bio might already be completed, therefore the
+			   request is no longer in the collision hash.
+			   => Do not try to validate block_id as request. */
+			/* In Protocol B we might already have got a P_RECV_ACK
+			   but then get a P_NEG_ACK after wards. */
+			drbd_set_out_of_sync(mdev, sector, size);
+			return true;
+		} else {
+			dev_err(DEV, "%s: failed to find req %p, sector %llus\n", __func__,
+				(void *)(unsigned long)p->block_id, (unsigned long long)sector);
+			return false;
+		}
+	}
+	__req_mod(req, neg_acked, &m);
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+	return true;
 }
 
 static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4294,11 +4429,20 @@
 
 	if (get_ldev_if_state(mdev, D_FAILED)) {
 		drbd_rs_complete_io(mdev, sector);
-		drbd_rs_failed_io(mdev, sector, size);
+		switch (be16_to_cpu(h->command)) {
+		case P_NEG_RS_DREPLY:
+			drbd_rs_failed_io(mdev, sector, size);
+		case P_RS_CANCEL:
+			break;
+		default:
+			D_ASSERT(0);
+			put_ldev(mdev);
+			return false;
+		}
 		put_ldev(mdev);
 	}
 
-	return TRUE;
+	return true;
 }
 
 static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4307,7 +4451,14 @@
 
 	tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
 
-	return TRUE;
+	if (mdev->state.conn == C_AHEAD &&
+	    atomic_read(&mdev->ap_in_flight) == 0 &&
+	    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
+		mdev->start_resync_timer.expires = jiffies + HZ;
+		add_timer(&mdev->start_resync_timer);
+	}
+
+	return true;
 }
 
 static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
@@ -4328,12 +4479,18 @@
 		ov_oos_print(mdev);
 
 	if (!get_ldev(mdev))
-		return TRUE;
+		return true;
 
 	drbd_rs_complete_io(mdev, sector);
 	dec_rs_pending(mdev);
 
-	if (--mdev->ov_left == 0) {
+	--mdev->ov_left;
+
+	/* let's advance progress step marks only for every other megabyte */
+	if ((mdev->ov_left & 0x200) == 0x200)
+		drbd_advance_rs_marks(mdev, mdev->ov_left);
+
+	if (mdev->ov_left == 0) {
 		w = kmalloc(sizeof(*w), GFP_NOIO);
 		if (w) {
 			w->cb = w_ov_finished;
@@ -4345,12 +4502,12 @@
 		}
 	}
 	put_ldev(mdev);
-	return TRUE;
+	return true;
 }
 
 static int got_skip(struct drbd_conf *mdev, struct p_header80 *h)
 {
-	return TRUE;
+	return true;
 }
 
 struct asender_cmd {
@@ -4378,6 +4535,7 @@
 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
 	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
+	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply},
 	[P_MAX_CMD]	    = { 0, NULL },
 	};
 	if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index ad3fc62..5c0c8be 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c

@@ -140,9 +140,14 @@
 	struct hlist_node *n;
 	struct hlist_head *slot;
 
-	/* before we can signal completion to the upper layers,
-	 * we may need to close the current epoch */
+	/* Before we can signal completion to the upper layers,
+	 * we may need to close the current epoch.
+	 * We can skip this, if this request has not even been sent, because we
+	 * did not have a fully established connection yet/anymore, during
+	 * bitmap exchange, or while we are C_AHEAD due to congestion policy.
+	 */
 	if (mdev->state.conn >= C_CONNECTED &&
+	    (s & RQ_NET_SENT) != 0 &&
 	    req->epoch == mdev->newest_tle->br_number)
 		queue_barrier(mdev);
 
@@ -440,7 +445,7 @@
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 
-		__drbd_chk_io_error(mdev, FALSE);
+		__drbd_chk_io_error(mdev, false);
 		_req_may_be_done_not_susp(req, m);
 		put_ldev(mdev);
 		break;
@@ -461,7 +466,7 @@
 
 		D_ASSERT(!(req->rq_state & RQ_NET_MASK));
 
-		__drbd_chk_io_error(mdev, FALSE);
+		__drbd_chk_io_error(mdev, false);
 		put_ldev(mdev);
 
 		/* no point in retrying if there is no good remote data,
@@ -545,6 +550,14 @@
 
 		break;
 
+	case queue_for_send_oos:
+		req->rq_state |= RQ_NET_QUEUED;
+		req->w.cb =  w_send_oos;
+		drbd_queue_work(&mdev->data.work, &req->w);
+		break;
+
+	case oos_handed_to_network:
+		/* actually the same */
 	case send_canceled:
 		/* treat it the same */
 	case send_failed:
@@ -558,6 +571,9 @@
 
 	case handed_over_to_network:
 		/* assert something? */
+		if (bio_data_dir(req->master_bio) == WRITE)
+			atomic_add(req->size>>9, &mdev->ap_in_flight);
+
 		if (bio_data_dir(req->master_bio) == WRITE &&
 		    mdev->net_conf->wire_protocol == DRBD_PROT_A) {
 			/* this is what is dangerous about protocol A:
@@ -591,6 +607,9 @@
 			dec_ap_pending(mdev);
 		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
 		req->rq_state |= RQ_NET_DONE;
+		if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
+			atomic_sub(req->size>>9, &mdev->ap_in_flight);
+
 		/* if it is still queued, we may not complete it here.
 		 * it will be canceled soon. */
 		if (!(req->rq_state & RQ_NET_QUEUED))
@@ -628,14 +647,17 @@
 		req->rq_state |= RQ_NET_OK;
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
 		dec_ap_pending(mdev);
+		atomic_sub(req->size>>9, &mdev->ap_in_flight);
 		req->rq_state &= ~RQ_NET_PENDING;
 		_req_may_be_done_not_susp(req, m);
 		break;
 
 	case neg_acked:
 		/* assert something? */
-		if (req->rq_state & RQ_NET_PENDING)
+		if (req->rq_state & RQ_NET_PENDING) {
 			dec_ap_pending(mdev);
+			atomic_sub(req->size>>9, &mdev->ap_in_flight);
+		}
 		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
 
 		req->rq_state |= RQ_NET_DONE;
@@ -690,8 +712,11 @@
 			dev_err(DEV, "FIXME (barrier_acked but pending)\n");
 			list_move(&req->tl_requests, &mdev->out_of_sequence_requests);
 		}
-		D_ASSERT(req->rq_state & RQ_NET_SENT);
-		req->rq_state |= RQ_NET_DONE;
+		if ((req->rq_state & RQ_NET_MASK) != 0) {
+			req->rq_state |= RQ_NET_DONE;
+			if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
+				atomic_sub(req->size>>9, &mdev->ap_in_flight);
+		}
 		_req_may_be_done(req, m); /* Allowed while state.susp */
 		break;
 
@@ -738,14 +763,14 @@
 	return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
 }
 
-static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
+static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
 {
 	const int rw = bio_rw(bio);
 	const int size = bio->bi_size;
 	const sector_t sector = bio->bi_sector;
 	struct drbd_tl_epoch *b = NULL;
 	struct drbd_request *req;
-	int local, remote;
+	int local, remote, send_oos = 0;
 	int err = -EIO;
 	int ret = 0;
 
@@ -759,6 +784,7 @@
 		bio_endio(bio, -ENOMEM);
 		return 0;
 	}
+	req->start_time = start_time;
 
 	local = get_ldev(mdev);
 	if (!local) {
@@ -808,9 +834,9 @@
 		drbd_al_begin_io(mdev, sector);
 	}
 
-	remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
-			    (mdev->state.pdsk == D_INCONSISTENT &&
-			     mdev->state.conn >= C_CONNECTED));
+	remote = remote && drbd_should_do_remote(mdev->state);
+	send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
+	D_ASSERT(!(remote && send_oos));
 
 	if (!(local || remote) && !is_susp(mdev->state)) {
 		if (__ratelimit(&drbd_ratelimit_state))
@@ -824,7 +850,7 @@
 	 * but there is a race between testing the bit and pointer outside the
 	 * spinlock, and grabbing the spinlock.
 	 * if we lost that race, we retry.  */
-	if (rw == WRITE && remote &&
+	if (rw == WRITE && (remote || send_oos) &&
 	    mdev->unused_spare_tle == NULL &&
 	    test_bit(CREATE_BARRIER, &mdev->flags)) {
 allocate_barrier:
@@ -842,18 +868,19 @@
 	if (is_susp(mdev->state)) {
 		/* If we got suspended, use the retry mechanism of
 		   generic_make_request() to restart processing of this
-		   bio. In the next call to drbd_make_request_26
+		   bio. In the next call to drbd_make_request
 		   we sleep in inc_ap_bio() */
 		ret = 1;
 		spin_unlock_irq(&mdev->req_lock);
 		goto fail_free_complete;
 	}
 
-	if (remote) {
-		remote = (mdev->state.pdsk == D_UP_TO_DATE ||
-			    (mdev->state.pdsk == D_INCONSISTENT &&
-			     mdev->state.conn >= C_CONNECTED));
-		if (!remote)
+	if (remote || send_oos) {
+		remote = drbd_should_do_remote(mdev->state);
+		send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
+		D_ASSERT(!(remote && send_oos));
+
+		if (!(remote || send_oos))
 			dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
 		if (!(local || remote)) {
 			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
@@ -866,7 +893,7 @@
 		mdev->unused_spare_tle = b;
 		b = NULL;
 	}
-	if (rw == WRITE && remote &&
+	if (rw == WRITE && (remote || send_oos) &&
 	    mdev->unused_spare_tle == NULL &&
 	    test_bit(CREATE_BARRIER, &mdev->flags)) {
 		/* someone closed the current epoch
@@ -889,7 +916,7 @@
 	 * barrier packet.  To get the write ordering right, we only have to
 	 * make sure that, if this is a write request and it triggered a
 	 * barrier packet, this request is queued within the same spinlock. */
-	if (remote && mdev->unused_spare_tle &&
+	if ((remote || send_oos) && mdev->unused_spare_tle &&
 	    test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
 		_tl_add_barrier(mdev, mdev->unused_spare_tle);
 		mdev->unused_spare_tle = NULL;
@@ -937,6 +964,34 @@
 				? queue_for_net_write
 				: queue_for_net_read);
 	}
+	if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
+		_req_mod(req, queue_for_send_oos);
+
+	if (remote &&
+	    mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
+		int congested = 0;
+
+		if (mdev->net_conf->cong_fill &&
+		    atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
+			dev_info(DEV, "Congestion-fill threshold reached\n");
+			congested = 1;
+		}
+
+		if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
+			dev_info(DEV, "Congestion-extents threshold reached\n");
+			congested = 1;
+		}
+
+		if (congested) {
+			queue_barrier(mdev); /* last barrier, after mirrored writes */
+
+			if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
+				_drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
+			else  /*mdev->net_conf->on_congestion == OC_DISCONNECT */
+				_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
+		}
+	}
+
 	spin_unlock_irq(&mdev->req_lock);
 	kfree(b); /* if someone else has beaten us to it... */
 
@@ -949,9 +1004,9 @@
 		 * stable storage, and this is a WRITE, we may not even submit
 		 * this bio. */
 		if (get_ldev(mdev)) {
-			if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
-					     : rw == READ  ? DRBD_FAULT_DT_RD
-					     :               DRBD_FAULT_DT_RA))
+			if (drbd_insert_fault(mdev,   rw == WRITE ? DRBD_FAULT_DT_WR
+						    : rw == READ  ? DRBD_FAULT_DT_RD
+						    :               DRBD_FAULT_DT_RA))
 				bio_endio(req->private_bio, -EIO);
 			else
 				generic_make_request(req->private_bio);
@@ -1018,16 +1073,19 @@
 	return 0;
 }
 
-int drbd_make_request_26(struct request_queue *q, struct bio *bio)
+int drbd_make_request(struct request_queue *q, struct bio *bio)
 {
 	unsigned int s_enr, e_enr;
 	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
+	unsigned long start_time;
 
 	if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
 		bio_endio(bio, -EPERM);
 		return 0;
 	}
 
+	start_time = jiffies;
+
 	/*
 	 * what we "blindly" assume:
 	 */
@@ -1042,12 +1100,12 @@
 
 	if (likely(s_enr == e_enr)) {
 		inc_ap_bio(mdev, 1);
-		return drbd_make_request_common(mdev, bio);
+		return drbd_make_request_common(mdev, bio, start_time);
 	}
 
 	/* can this bio be split generically?
 	 * Maybe add our own split-arbitrary-bios function. */
-	if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) {
+	if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
 		/* rather error out here than BUG in bio_split */
 		dev_err(DEV, "bio would need to, but cannot, be split: "
 		    "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
@@ -1069,11 +1127,7 @@
 		const int sps = 1 << HT_SHIFT; /* sectors per slot */
 		const int mask = sps - 1;
 		const sector_t first_sectors = sps - (sect & mask);
-		bp = bio_split(bio,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
-				bio_split_pool,
-#endif
-				first_sectors);
+		bp = bio_split(bio, first_sectors);
 
 		/* we need to get a "reference count" (ap_bio_cnt)
 		 * to avoid races with the disconnect/reconnect/suspend code.
@@ -1084,10 +1138,10 @@
 
 		D_ASSERT(e_enr == s_enr + 1);
 
-		while (drbd_make_request_common(mdev, &bp->bio1))
+		while (drbd_make_request_common(mdev, &bp->bio1, start_time))
 			inc_ap_bio(mdev, 1);
 
-		while (drbd_make_request_common(mdev, &bp->bio2))
+		while (drbd_make_request_common(mdev, &bp->bio2, start_time))
 			inc_ap_bio(mdev, 1);
 
 		dec_ap_bio(mdev);
@@ -1098,7 +1152,7 @@
 }
 
 /* This is called by bio_add_page().  With this function we reduce
- * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs
+ * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs
  * units (was AL_EXTENTs).
  *
  * we do the calculation within the lower 32bit of the byte offsets,
@@ -1108,7 +1162,7 @@
  * As long as the BIO is empty we have to allow at least one bvec,
  * regardless of size and offset.  so the resulting bio may still
  * cross extent boundaries.  those are dealt with (bio_split) in
- * drbd_make_request_26.
+ * drbd_make_request.
  */
 int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
 {
@@ -1118,8 +1172,8 @@
 	unsigned int bio_size = bvm->bi_size;
 	int limit, backing_limit;
 
-	limit = DRBD_MAX_SEGMENT_SIZE
-	      - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size);
+	limit = DRBD_MAX_BIO_SIZE
+	      - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size);
 	if (limit < 0)
 		limit = 0;
 	if (bio_size == 0) {
@@ -1136,3 +1190,42 @@
 	}
 	return limit;
 }
+
+void request_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+	struct drbd_request *req; /* oldest request */
+	struct list_head *le;
+	unsigned long et = 0; /* effective timeout = ko_count * timeout */
+
+	if (get_net_conf(mdev)) {
+		et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
+		put_net_conf(mdev);
+	}
+	if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
+		return; /* Recurring timer stopped */
+
+	spin_lock_irq(&mdev->req_lock);
+	le = &mdev->oldest_tle->requests;
+	if (list_empty(le)) {
+		spin_unlock_irq(&mdev->req_lock);
+		mod_timer(&mdev->request_timer, jiffies + et);
+		return;
+	}
+
+	le = le->prev;
+	req = list_entry(le, struct drbd_request, tl_requests);
+	if (time_is_before_eq_jiffies(req->start_time + et)) {
+		if (req->rq_state & RQ_NET_PENDING) {
+			dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
+			_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
+		} else {
+			dev_warn(DEV, "Local backing block device frozen?\n");
+			mod_timer(&mdev->request_timer, jiffies + et);
+		}
+	} else {
+		mod_timer(&mdev->request_timer, req->start_time + et);
+	}
+
+	spin_unlock_irq(&mdev->req_lock);
+}

diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index ab2bd09..32e2c3e 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h

@@ -82,14 +82,16 @@
 	to_be_submitted,
 
 	/* XXX yes, now I am inconsistent...
-	 * these two are not "events" but "actions"
+	 * these are not "events" but "actions"
 	 * oh, well... */
 	queue_for_net_write,
 	queue_for_net_read,
+	queue_for_send_oos,
 
 	send_canceled,
 	send_failed,
 	handed_over_to_network,
+	oos_handed_to_network,
 	connection_lost_while_pending,
 	read_retry_remote_canceled,
 	recv_acked_by_peer,
@@ -289,7 +291,6 @@
 		req->epoch       = 0;
 		req->sector      = bio_src->bi_sector;
 		req->size        = bio_src->bi_size;
-		req->start_time  = jiffies;
 		INIT_HLIST_NODE(&req->colision);
 		INIT_LIST_HEAD(&req->tl_requests);
 		INIT_LIST_HEAD(&req->w.list);
@@ -321,6 +322,7 @@
 		struct bio_and_error *m);
 extern void complete_master_bio(struct drbd_conf *mdev,
 		struct bio_and_error *m);
+extern void request_timer_fn(unsigned long data);
 
 /* use this if you don't want to deal with calling complete_master_bio()
  * outside the spinlock, e.g. when walking some list on cleanup. */
@@ -338,23 +340,43 @@
 	return rv;
 }
 
-/* completion of master bio is outside of spinlock.
- * If you need it irqsave, do it your self!
- * Which means: don't use from bio endio callback. */
+/* completion of master bio is outside of our spinlock.
+ * We still may or may not be inside some irqs disabled section
+ * of the lower level driver completion callback, so we need to
+ * spin_lock_irqsave here. */
 static inline int req_mod(struct drbd_request *req,
 		enum drbd_req_event what)
 {
+	unsigned long flags;
 	struct drbd_conf *mdev = req->mdev;
 	struct bio_and_error m;
 	int rv;
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irqsave(&mdev->req_lock, flags);
 	rv = __req_mod(req, what, &m);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
 
 	if (m.bio)
 		complete_master_bio(mdev, &m);
 
 	return rv;
 }
+
+static inline bool drbd_should_do_remote(union drbd_state s)
+{
+	return s.pdsk == D_UP_TO_DATE ||
+		(s.pdsk >= D_INCONSISTENT &&
+		 s.conn >= C_WF_BITMAP_T &&
+		 s.conn < C_AHEAD);
+	/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
+	   That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
+	   states. */
+}
+static inline bool drbd_should_send_oos(union drbd_state s)
+{
+	return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
+	/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
+	   since we enter state C_AHEAD only if proto >= 96 */
+}
+
 #endif

diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
index 85179e1..c44a2a6 100644
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c

@@ -48,6 +48,8 @@
 	[C_PAUSED_SYNC_T]    = "PausedSyncT",
 	[C_VERIFY_S]         = "VerifyS",
 	[C_VERIFY_T]         = "VerifyT",
+	[C_AHEAD]            = "Ahead",
+	[C_BEHIND]           = "Behind",
 };
 
 static const char *drbd_role_s_names[] = {
@@ -92,7 +94,7 @@
 const char *drbd_conn_str(enum drbd_conns s)
 {
 	/* enums are unsigned... */
-	return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s];
+	return s > C_BEHIND ? "TOO_LARGE" : drbd_conn_s_names[s];
 }
 
 const char *drbd_role_str(enum drbd_role s)
@@ -105,7 +107,7 @@
 	return s > D_UP_TO_DATE    ? "TOO_LARGE" : drbd_disk_s_names[s];
 }
 
-const char *drbd_set_st_err_str(enum drbd_state_ret_codes err)
+const char *drbd_set_st_err_str(enum drbd_state_rv err)
 {
 	return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" :
 	       err > SS_TWO_PRIMARIES ? "TOO_LARGE"

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index e027446..f7e6c92 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c

@@ -39,18 +39,17 @@
 #include "drbd_req.h"
 
 static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
+static int w_make_resync_request(struct drbd_conf *mdev,
+				 struct drbd_work *w, int cancel);
 
 
 
-/* defined here:
-   drbd_md_io_complete
-   drbd_endio_sec
-   drbd_endio_pri
-
- * more endio handlers:
-   atodb_endio in drbd_actlog.c
-   drbd_bm_async_io_complete in drbd_bitmap.c
-
+/* endio handlers:
+ *   drbd_md_io_complete (defined here)
+ *   drbd_endio_pri (defined here)
+ *   drbd_endio_sec (defined here)
+ *   bm_async_io_complete (defined in drbd_bitmap.c)
+ *
  * For all these callbacks, note the following:
  * The callbacks will be called in irq context by the IDE drivers,
  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
@@ -94,7 +93,7 @@
 	if (list_empty(&mdev->read_ee))
 		wake_up(&mdev->ee_wait);
 	if (test_bit(__EE_WAS_ERROR, &e->flags))
-		__drbd_chk_io_error(mdev, FALSE);
+		__drbd_chk_io_error(mdev, false);
 	spin_unlock_irqrestore(&mdev->req_lock, flags);
 
 	drbd_queue_work(&mdev->data.work, &e->w);
@@ -137,7 +136,7 @@
 		: list_empty(&mdev->active_ee);
 
 	if (test_bit(__EE_WAS_ERROR, &e->flags))
-		__drbd_chk_io_error(mdev, FALSE);
+		__drbd_chk_io_error(mdev, false);
 	spin_unlock_irqrestore(&mdev->req_lock, flags);
 
 	if (is_syncer_req)
@@ -163,14 +162,15 @@
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 	int is_write = bio_data_dir(bio) == WRITE;
 
-	if (error)
+	if (error && __ratelimit(&drbd_ratelimit_state))
 		dev_warn(DEV, "%s: error=%d s=%llus\n",
 				is_write ? "write" : "read", error,
 				(unsigned long long)e->sector);
 	if (!error && !uptodate) {
-		dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
-				is_write ? "write" : "read",
-				(unsigned long long)e->sector);
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
+					is_write ? "write" : "read",
+					(unsigned long long)e->sector);
 		/* strange behavior of some lower level drivers...
 		 * fail the request by clearing the uptodate flag,
 		 * but do not return any error?! */
@@ -250,13 +250,6 @@
 	return w_send_read_req(mdev, w, 0);
 }
 
-int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
-	ERR_IF(cancel) return 1;
-	dev_err(DEV, "resync inactive, but callback triggered??\n");
-	return 1; /* Simply ignore this! */
-}
-
 void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest)
 {
 	struct hash_desc desc;
@@ -355,7 +348,7 @@
 	if (!get_ldev(mdev))
 		return -EIO;
 
-	if (drbd_rs_should_slow_down(mdev))
+	if (drbd_rs_should_slow_down(mdev, sector))
 		goto defer;
 
 	/* GFP_TRY, because if there is no memory available right now, this may
@@ -373,9 +366,10 @@
 	if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
 		return 0;
 
-	/* drbd_submit_ee currently fails for one reason only:
-	 * not being able to allocate enough bios.
-	 * Is dropping the connection going to help? */
+	/* If it failed because of ENOMEM, retry should help.  If it failed
+	 * because bio_add_page failed (probably broken lower level driver),
+	 * retry may or may not help.
+	 * If it does not, you may need to force disconnect. */
 	spin_lock_irq(&mdev->req_lock);
 	list_del(&e->w.list);
 	spin_unlock_irq(&mdev->req_lock);
@@ -386,26 +380,25 @@
 	return -EAGAIN;
 }
 
+int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	switch (mdev->state.conn) {
+	case C_VERIFY_S:
+		w_make_ov_request(mdev, w, cancel);
+		break;
+	case C_SYNC_TARGET:
+		w_make_resync_request(mdev, w, cancel);
+		break;
+	}
+
+	return 1;
+}
+
 void resync_timer_fn(unsigned long data)
 {
 	struct drbd_conf *mdev = (struct drbd_conf *) data;
-	int queue;
 
-	queue = 1;
-	switch (mdev->state.conn) {
-	case C_VERIFY_S:
-		mdev->resync_work.cb = w_make_ov_request;
-		break;
-	case C_SYNC_TARGET:
-		mdev->resync_work.cb = w_make_resync_request;
-		break;
-	default:
-		queue = 0;
-		mdev->resync_work.cb = w_resync_inactive;
-	}
-
-	/* harmless race: list_empty outside data.work.q_lock */
-	if (list_empty(&mdev->resync_work.list) && queue)
+	if (list_empty(&mdev->resync_work.list))
 		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
 }
 
@@ -438,7 +431,7 @@
 		fb->values[i] += value;
 }
 
-int drbd_rs_controller(struct drbd_conf *mdev)
+static int drbd_rs_controller(struct drbd_conf *mdev)
 {
 	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
 	unsigned int want;     /* The number of sectors we want in the proxy */
@@ -492,29 +485,36 @@
 	return req_sect;
 }
 
-int w_make_resync_request(struct drbd_conf *mdev,
-		struct drbd_work *w, int cancel)
+static int drbd_rs_number_requests(struct drbd_conf *mdev)
+{
+	int number;
+	if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
+		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
+		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
+	} else {
+		mdev->c_sync_rate = mdev->sync_conf.rate;
+		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
+	}
+
+	/* ignore the amount of pending requests, the resync controller should
+	 * throttle down to incoming reply rate soon enough anyways. */
+	return number;
+}
+
+static int w_make_resync_request(struct drbd_conf *mdev,
+				 struct drbd_work *w, int cancel)
 {
 	unsigned long bit;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
-	int max_segment_size;
-	int number, rollback_i, size, pe, mx;
+	int max_bio_size;
+	int number, rollback_i, size;
 	int align, queued, sndbuf;
 	int i = 0;
 
 	if (unlikely(cancel))
 		return 1;
 
-	if (unlikely(mdev->state.conn < C_CONNECTED)) {
-		dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected");
-		return 0;
-	}
-
-	if (mdev->state.conn != C_SYNC_TARGET)
-		dev_err(DEV, "%s in w_make_resync_request\n",
-			drbd_conn_str(mdev->state.conn));
-
 	if (mdev->rs_total == 0) {
 		/* empty resync? */
 		drbd_resync_finished(mdev);
@@ -527,49 +527,19 @@
 		   to continue resync with a broken disk makes no sense at
 		   all */
 		dev_err(DEV, "Disk broke down during resync!\n");
-		mdev->resync_work.cb = w_resync_inactive;
 		return 1;
 	}
 
 	/* starting with drbd 8.3.8, we can handle multi-bio EEs,
 	 * if it should be necessary */
-	max_segment_size =
-		mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) :
-		mdev->agreed_pro_version < 95 ?	DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE;
+	max_bio_size =
+		mdev->agreed_pro_version < 94 ? queue_max_hw_sectors(mdev->rq_queue) << 9 :
+		mdev->agreed_pro_version < 95 ?	DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_BIO_SIZE;
 
-	if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
-		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
-		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
-	} else {
-		mdev->c_sync_rate = mdev->sync_conf.rate;
-		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
-	}
-
-	/* Throttle resync on lower level disk activity, which may also be
-	 * caused by application IO on Primary/SyncTarget.
-	 * Keep this after the call to drbd_rs_controller, as that assumes
-	 * to be called as precisely as possible every SLEEP_TIME,
-	 * and would be confused otherwise. */
-	if (drbd_rs_should_slow_down(mdev))
+	number = drbd_rs_number_requests(mdev);
+	if (number == 0)
 		goto requeue;
 
-	mutex_lock(&mdev->data.mutex);
-	if (mdev->data.socket)
-		mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req);
-	else
-		mx = 1;
-	mutex_unlock(&mdev->data.mutex);
-
-	/* For resync rates >160MB/sec, allow more pending RS requests */
-	if (number > mx)
-		mx = number;
-
-	/* Limit the number of pending RS requests to no more than the peer's receive buffer */
-	pe = atomic_read(&mdev->rs_pending_cnt);
-	if ((pe + number) > mx) {
-		number = mx - pe;
-	}
-
 	for (i = 0; i < number; i++) {
 		/* Stop generating RS requests, when half of the send buffer is filled */
 		mutex_lock(&mdev->data.mutex);
@@ -588,16 +558,16 @@
 		size = BM_BLOCK_SIZE;
 		bit  = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
 
-		if (bit == -1UL) {
+		if (bit == DRBD_END_OF_BITMAP) {
 			mdev->bm_resync_fo = drbd_bm_bits(mdev);
-			mdev->resync_work.cb = w_resync_inactive;
 			put_ldev(mdev);
 			return 1;
 		}
 
 		sector = BM_BIT_TO_SECT(bit);
 
-		if (drbd_try_rs_begin_io(mdev, sector)) {
+		if (drbd_rs_should_slow_down(mdev, sector) ||
+		    drbd_try_rs_begin_io(mdev, sector)) {
 			mdev->bm_resync_fo = bit;
 			goto requeue;
 		}
@@ -608,7 +578,7 @@
 			goto next_sector;
 		}
 
-#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE
+#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
 		/* try to find some adjacent bits.
 		 * we stop if we have already the maximum req size.
 		 *
@@ -618,7 +588,7 @@
 		align = 1;
 		rollback_i = i;
 		for (;;) {
-			if (size + BM_BLOCK_SIZE > max_segment_size)
+			if (size + BM_BLOCK_SIZE > max_bio_size)
 				break;
 
 			/* Be always aligned */
@@ -685,7 +655,6 @@
 		 * resync data block, and the last bit is cleared.
 		 * until then resync "work" is "inactive" ...
 		 */
-		mdev->resync_work.cb = w_resync_inactive;
 		put_ldev(mdev);
 		return 1;
 	}
@@ -706,27 +675,18 @@
 	if (unlikely(cancel))
 		return 1;
 
-	if (unlikely(mdev->state.conn < C_CONNECTED)) {
-		dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected");
-		return 0;
-	}
-
-	number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
-	if (atomic_read(&mdev->rs_pending_cnt) > number)
-		goto requeue;
-
-	number -= atomic_read(&mdev->rs_pending_cnt);
+	number = drbd_rs_number_requests(mdev);
 
 	sector = mdev->ov_position;
 	for (i = 0; i < number; i++) {
 		if (sector >= capacity) {
-			mdev->resync_work.cb = w_resync_inactive;
 			return 1;
 		}
 
 		size = BM_BLOCK_SIZE;
 
-		if (drbd_try_rs_begin_io(mdev, sector)) {
+		if (drbd_rs_should_slow_down(mdev, sector) ||
+		    drbd_try_rs_begin_io(mdev, sector)) {
 			mdev->ov_position = sector;
 			goto requeue;
 		}
@@ -744,11 +704,33 @@
 	mdev->ov_position = sector;
 
  requeue:
+	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
 	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	return 1;
 }
 
 
+void start_resync_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+	drbd_queue_work(&mdev->data.work, &mdev->start_resync_work);
+}
+
+int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
+		dev_warn(DEV, "w_start_resync later...\n");
+		mdev->start_resync_timer.expires = jiffies + HZ/10;
+		add_timer(&mdev->start_resync_timer);
+		return 1;
+	}
+
+	drbd_start_resync(mdev, C_SYNC_SOURCE);
+	clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
+	return 1;
+}
+
 int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
 	kfree(w);
@@ -782,6 +764,7 @@
 	union drbd_state os, ns;
 	struct drbd_work *w;
 	char *khelper_cmd = NULL;
+	int verify_done = 0;
 
 	/* Remove all elements from the resync LRU. Since future actions
 	 * might set bits in the (main) bitmap, then the entries in the
@@ -792,8 +775,7 @@
 		 * queue (or even the read operations for those packets
 		 * is not finished by now).   Retry in 100ms. */
 
-		__set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ / 10);
+		schedule_timeout_interruptible(HZ / 10);
 		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
 		if (w) {
 			w->cb = w_resync_finished;
@@ -818,6 +800,8 @@
 	spin_lock_irq(&mdev->req_lock);
 	os = mdev->state;
 
+	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
+
 	/* This protects us against multiple calls (that can happen in the presence
 	   of application IO), and against connectivity loss just before we arrive here. */
 	if (os.conn <= C_CONNECTED)
@@ -827,8 +811,7 @@
 	ns.conn = C_CONNECTED;
 
 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
-	     (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ?
-	     "Online verify " : "Resync",
+	     verify_done ? "Online verify " : "Resync",
 	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
 
 	n_oos = drbd_bm_total_weight(mdev);
@@ -886,14 +869,18 @@
 			}
 		}
 
-		drbd_uuid_set_bm(mdev, 0UL);
-
-		if (mdev->p_uuid) {
-			/* Now the two UUID sets are equal, update what we
-			 * know of the peer. */
-			int i;
-			for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
-				mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
+			/* for verify runs, we don't update uuids here,
+			 * so there would be nothing to report. */
+			drbd_uuid_set_bm(mdev, 0UL);
+			drbd_print_uuids(mdev, "updated UUIDs");
+			if (mdev->p_uuid) {
+				/* Now the two UUID sets are equal, update what we
+				 * know of the peer. */
+				int i;
+				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
+					mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+			}
 		}
 	}
 
@@ -905,15 +892,11 @@
 	mdev->rs_total  = 0;
 	mdev->rs_failed = 0;
 	mdev->rs_paused = 0;
-	mdev->ov_start_sector = 0;
+	if (verify_done)
+		mdev->ov_start_sector = 0;
 
 	drbd_md_sync(mdev);
 
-	if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
-		dev_info(DEV, "Writing the whole bitmap\n");
-		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
-	}
-
 	if (khelper_cmd)
 		drbd_khelper(mdev, khelper_cmd);
 
@@ -994,7 +977,9 @@
 		put_ldev(mdev);
 	}
 
-	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+	if (mdev->state.conn == C_AHEAD) {
+		ok = drbd_send_ack(mdev, P_RS_CANCEL, e);
+	} else if (likely((e->flags & EE_WAS_ERROR) == 0)) {
 		if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
 			inc_rs_pending(mdev);
 			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
@@ -1096,25 +1081,27 @@
 	if (unlikely(cancel))
 		goto out;
 
-	if (unlikely((e->flags & EE_WAS_ERROR) != 0))
-		goto out;
-
 	digest_size = crypto_hash_digestsize(mdev->verify_tfm);
-	/* FIXME if this allocation fails, online verify will not terminate! */
 	digest = kmalloc(digest_size, GFP_NOIO);
-	if (digest) {
-		drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
-		inc_rs_pending(mdev);
-		ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
-					     digest, digest_size, P_OV_REPLY);
-		if (!ok)
-			dec_rs_pending(mdev);
-		kfree(digest);
+	if (!digest) {
+		ok = 0;	/* terminate the connection in case the allocation failed */
+		goto out;
 	}
 
+	if (likely(!(e->flags & EE_WAS_ERROR)))
+		drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
+	else
+		memset(digest, 0, digest_size);
+
+	inc_rs_pending(mdev);
+	ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
+				     digest, digest_size, P_OV_REPLY);
+	if (!ok)
+		dec_rs_pending(mdev);
+	kfree(digest);
+
 out:
 	drbd_free_ee(mdev, e);
-
 	dec_unacked(mdev);
 
 	return ok;
@@ -1129,7 +1116,6 @@
 		mdev->ov_last_oos_size = size>>9;
 	}
 	drbd_set_out_of_sync(mdev, sector, size);
-	set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
 }
 
 int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@@ -1165,10 +1151,6 @@
 			eq = !memcmp(digest, di->digest, digest_size);
 			kfree(digest);
 		}
-	} else {
-		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
-		if (__ratelimit(&drbd_ratelimit_state))
-			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
 	}
 
 	dec_unacked(mdev);
@@ -1182,7 +1164,13 @@
 
 	drbd_free_ee(mdev, e);
 
-	if (--mdev->ov_left == 0) {
+	--mdev->ov_left;
+
+	/* let's advance progress step marks only for every other megabyte */
+	if ((mdev->ov_left & 0x200) == 0x200)
+		drbd_advance_rs_marks(mdev, mdev->ov_left);
+
+	if (mdev->ov_left == 0) {
 		ov_oos_print(mdev);
 		drbd_resync_finished(mdev);
 	}
@@ -1235,6 +1223,22 @@
 	return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
 }
 
+int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		req_mod(req, send_canceled);
+		return 1;
+	}
+
+	ok = drbd_send_oos(mdev, req);
+	req_mod(req, oos_handed_to_network);
+
+	return ok;
+}
+
 /**
  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
  * @mdev:	DRBD device.
@@ -1430,6 +1434,17 @@
 	return retcode;
 }
 
+void drbd_rs_controller_reset(struct drbd_conf *mdev)
+{
+	atomic_set(&mdev->rs_sect_in, 0);
+	atomic_set(&mdev->rs_sect_ev, 0);
+	mdev->rs_in_flight = 0;
+	mdev->rs_planed = 0;
+	spin_lock(&mdev->peer_seq_lock);
+	fifo_set(&mdev->rs_plan_s, 0);
+	spin_unlock(&mdev->peer_seq_lock);
+}
+
 /**
  * drbd_start_resync() - Start the resync process
  * @mdev:	DRBD device.
@@ -1443,13 +1458,18 @@
 	union drbd_state ns;
 	int r;
 
-	if (mdev->state.conn >= C_SYNC_SOURCE) {
+	if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
 		dev_err(DEV, "Resync already running!\n");
 		return;
 	}
 
-	/* In case a previous resync run was aborted by an IO error/detach on the peer. */
-	drbd_rs_cancel_all(mdev);
+	if (mdev->state.conn < C_AHEAD) {
+		/* In case a previous resync run was aborted by an IO error/detach on the peer. */
+		drbd_rs_cancel_all(mdev);
+		/* This should be done when we abort the resync. We definitely do not
+		   want to have this for connections going back and forth between
+		   Ahead/Behind and SyncSource/SyncTarget */
+	}
 
 	if (side == C_SYNC_TARGET) {
 		/* Since application IO was locked out during C_WF_BITMAP_T and
@@ -1463,6 +1483,20 @@
 			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
 			return;
 		}
+	} else /* C_SYNC_SOURCE */ {
+		r = drbd_khelper(mdev, "before-resync-source");
+		r = (r >> 8) & 0xff;
+		if (r > 0) {
+			if (r == 3) {
+				dev_info(DEV, "before-resync-source handler returned %d, "
+					 "ignoring. Old userland tools?", r);
+			} else {
+				dev_info(DEV, "before-resync-source handler returned %d, "
+					 "dropping connection.\n", r);
+				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+				return;
+			}
+		}
 	}
 
 	drbd_state_lock(mdev);
@@ -1472,18 +1506,6 @@
 		return;
 	}
 
-	if (side == C_SYNC_TARGET) {
-		mdev->bm_resync_fo = 0;
-	} else /* side == C_SYNC_SOURCE */ {
-		u64 uuid;
-
-		get_random_bytes(&uuid, sizeof(u64));
-		drbd_uuid_set(mdev, UI_BITMAP, uuid);
-		drbd_send_sync_uuid(mdev, uuid);
-
-		D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
-	}
-
 	write_lock_irq(&global_state_lock);
 	ns = mdev->state;
 
@@ -1521,13 +1543,24 @@
 		_drbd_pause_after(mdev);
 	}
 	write_unlock_irq(&global_state_lock);
-	put_ldev(mdev);
 
 	if (r == SS_SUCCESS) {
 		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
 		     drbd_conn_str(ns.conn),
 		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
 		     (unsigned long) mdev->rs_total);
+		if (side == C_SYNC_TARGET)
+			mdev->bm_resync_fo = 0;
+
+		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
+		 * with w_send_oos, or the sync target will get confused as to
+		 * how much bits to resync.  We cannot do that always, because for an
+		 * empty resync and protocol < 95, we need to do it here, as we call
+		 * drbd_resync_finished from here in that case.
+		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
+		 * and from after_state_ch otherwise. */
+		if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96)
+			drbd_gen_and_send_sync_uuid(mdev);
 
 		if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
 			/* This still has a race (about when exactly the peers
@@ -1547,13 +1580,7 @@
 			drbd_resync_finished(mdev);
 		}
 
-		atomic_set(&mdev->rs_sect_in, 0);
-		atomic_set(&mdev->rs_sect_ev, 0);
-		mdev->rs_in_flight = 0;
-		mdev->rs_planed = 0;
-		spin_lock(&mdev->peer_seq_lock);
-		fifo_set(&mdev->rs_plan_s, 0);
-		spin_unlock(&mdev->peer_seq_lock);
+		drbd_rs_controller_reset(mdev);
 		/* ns.conn may already be != mdev->state.conn,
 		 * we may have been paused in between, or become paused until
 		 * the timer triggers.
@@ -1563,6 +1590,7 @@
 
 		drbd_md_sync(mdev);
 	}
+	put_ldev(mdev);
 	drbd_state_unlock(mdev);
 }
 

diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h
index 53586fa..151f1a3 100644
--- a/drivers/block/drbd/drbd_wrappers.h
+++ b/drivers/block/drbd/drbd_wrappers.h

@@ -39,7 +39,7 @@
 		return;
 	}
 
-	if (FAULT_ACTIVE(mdev, fault_type))
+	if (drbd_insert_fault(mdev, fault_type))
 		bio_endio(bio, -EIO);
 	else
 		generic_make_request(bio);

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 0f17ad8..b03771d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c

@@ -28,6 +28,7 @@
 #include <linux/cpu.h>
 #include <linux/completion.h>
 #include <linux/mutex.h>
+#include <linux/syscore_ops.h>
 
 #include <trace/events/power.h>
 
@@ -1340,35 +1341,31 @@
 }
 EXPORT_SYMBOL(cpufreq_get);
 
+static struct sysdev_driver cpufreq_sysdev_driver = {
+	.add		= cpufreq_add_dev,
+	.remove		= cpufreq_remove_dev,
+};
+
 
 /**
- *	cpufreq_suspend - let the low level driver prepare for suspend
+ * cpufreq_bp_suspend - Prepare the boot CPU for system suspend.
+ *
+ * This function is only executed for the boot processor.  The other CPUs
+ * have been put offline by means of CPU hotplug.
  */
-
-static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
+static int cpufreq_bp_suspend(void)
 {
 	int ret = 0;
 
-	int cpu = sysdev->id;
+	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
 	dprintk("suspending cpu %u\n", cpu);
 
-	if (!cpu_online(cpu))
-		return 0;
-
-	/* we may be lax here as interrupts are off. Nonetheless
-	 * we need to grab the correct cpu policy, as to check
-	 * whether we really run on this CPU.
-	 */
-
+	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
 	if (!cpu_policy)
-		return -EINVAL;
-
-	/* only handle each CPU group once */
-	if (unlikely(cpu_policy->cpu != cpu))
-		goto out;
+		return 0;
 
 	if (cpufreq_driver->suspend) {
 		ret = cpufreq_driver->suspend(cpu_policy);
@@ -1377,13 +1374,12 @@
 					"step on CPU %u\n", cpu_policy->cpu);
 	}
 
-out:
 	cpufreq_cpu_put(cpu_policy);
 	return ret;
 }
 
 /**
- *	cpufreq_resume -  restore proper CPU frequency handling after resume
+ * cpufreq_bp_resume - Restore proper frequency handling of the boot CPU.
  *
  *	1.) resume CPUfreq hardware support (cpufreq_driver->resume())
  *	2.) schedule call cpufreq_update_policy() ASAP as interrupts are
@@ -1391,31 +1387,23 @@
  *	    what we believe it to be. This is a bit later than when it
  *	    should be, but nonethteless it's better than calling
  *	    cpufreq_driver->get() here which might re-enable interrupts...
+ *
+ * This function is only executed for the boot CPU.  The other CPUs have not
+ * been turned on yet.
  */
-static int cpufreq_resume(struct sys_device *sysdev)
+static void cpufreq_bp_resume(void)
 {
 	int ret = 0;
 
-	int cpu = sysdev->id;
+	int cpu = smp_processor_id();
 	struct cpufreq_policy *cpu_policy;
 
 	dprintk("resuming cpu %u\n", cpu);
 
-	if (!cpu_online(cpu))
-		return 0;
-
-	/* we may be lax here as interrupts are off. Nonetheless
-	 * we need to grab the correct cpu policy, as to check
-	 * whether we really run on this CPU.
-	 */
-
+	/* If there's no policy for the boot CPU, we have nothing to do. */
 	cpu_policy = cpufreq_cpu_get(cpu);
 	if (!cpu_policy)
-		return -EINVAL;
-
-	/* only handle each CPU group once */
-	if (unlikely(cpu_policy->cpu != cpu))
-		goto fail;
+		return;
 
 	if (cpufreq_driver->resume) {
 		ret = cpufreq_driver->resume(cpu_policy);
@@ -1430,14 +1418,11 @@
 
 fail:
 	cpufreq_cpu_put(cpu_policy);
-	return ret;
 }
 
-static struct sysdev_driver cpufreq_sysdev_driver = {
-	.add		= cpufreq_add_dev,
-	.remove		= cpufreq_remove_dev,
-	.suspend	= cpufreq_suspend,
-	.resume		= cpufreq_resume,
+static struct syscore_ops cpufreq_syscore_ops = {
+	.suspend	= cpufreq_bp_suspend,
+	.resume		= cpufreq_bp_resume,
 };
 
 
@@ -2002,6 +1987,7 @@
 	cpufreq_global_kobject = kobject_create_and_add("cpufreq",
 						&cpu_sysdev_class.kset.kobj);
 	BUG_ON(!cpufreq_global_kobject);
+	register_syscore_ops(&cpufreq_syscore_ops);
 
 	return 0;
 }

diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c
index 33fc685..3525ad9 100644
--- a/drivers/gpio/adp5588-gpio.c
+++ b/drivers/gpio/adp5588-gpio.c

@@ -289,10 +289,10 @@
 
 	for (gpio = 0; gpio < dev->gpio_chip.ngpio; gpio++) {
 		int irq = gpio + dev->irq_base;
-		set_irq_chip_data(irq, dev);
-		set_irq_chip_and_handler(irq, &adp5588_irq_chip,
+		irq_set_chip_data(irq, dev);
+		irq_set_chip_and_handler(irq, &adp5588_irq_chip,
 					 handle_level_irq);
-		set_irq_nested_thread(irq, 1);
+		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		/*
 		 * ARM needs us to explicitly flag the IRQ as VALID,
@@ -300,7 +300,7 @@
 		 */
 		set_irq_flags(irq, IRQF_VALID);
 #else
-		set_irq_noprobe(irq);
+		irq_set_noprobe(irq);
 #endif
 	}
 

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 649550e..36a2974 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c

@@ -1656,51 +1656,6 @@
 			chip->get
 				? (chip->get(chip, i) ? "hi" : "lo")
 				: "?  ");
-
-		if (!is_out) {
-			int		irq = gpio_to_irq(gpio);
-			struct irq_desc	*desc = irq_to_desc(irq);
-
-			/* This races with request_irq(), set_irq_type(),
-			 * and set_irq_wake() ... but those are "rare".
-			 *
-			 * More significantly, trigger type flags aren't
-			 * currently maintained by genirq.
-			 */
-			if (irq >= 0 && desc->action) {
-				char *trigger;
-
-				switch (desc->status & IRQ_TYPE_SENSE_MASK) {
-				case IRQ_TYPE_NONE:
-					trigger = "(default)";
-					break;
-				case IRQ_TYPE_EDGE_FALLING:
-					trigger = "edge-falling";
-					break;
-				case IRQ_TYPE_EDGE_RISING:
-					trigger = "edge-rising";
-					break;
-				case IRQ_TYPE_EDGE_BOTH:
-					trigger = "edge-both";
-					break;
-				case IRQ_TYPE_LEVEL_HIGH:
-					trigger = "level-high";
-					break;
-				case IRQ_TYPE_LEVEL_LOW:
-					trigger = "level-low";
-					break;
-				default:
-					trigger = "?trigger?";
-					break;
-				}
-
-				seq_printf(s, " irq-%d %s%s",
-					irq, trigger,
-					(desc->status & IRQ_WAKEUP)
-						? " wakeup" : "");
-			}
-		}
-
 		seq_printf(s, "\n");
 	}
 }

diff --git a/drivers/gpio/max732x.c b/drivers/gpio/max732x.c
index 9e1d01f..ad6951e 100644
--- a/drivers/gpio/max732x.c
+++ b/drivers/gpio/max732x.c

@@ -470,14 +470,14 @@
 			if (!(chip->dir_input & (1 << lvl)))
 				continue;
 
-			set_irq_chip_data(irq, chip);
-			set_irq_chip_and_handler(irq, &max732x_irq_chip,
+			irq_set_chip_data(irq, chip);
+			irq_set_chip_and_handler(irq, &max732x_irq_chip,
 						 handle_edge_irq);
-			set_irq_nested_thread(irq, 1);
+			irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 			set_irq_flags(irq, IRQF_VALID);
 #else
-			set_irq_noprobe(irq);
+			irq_set_noprobe(irq);
 #endif
 		}
 

diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 2fc25de..583e925 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c

@@ -395,13 +395,13 @@
 		for (lvl = 0; lvl < chip->gpio_chip.ngpio; lvl++) {
 			int irq = lvl + chip->irq_base;
 
-			set_irq_chip_data(irq, chip);
-			set_irq_chip_and_handler(irq, &pca953x_irq_chip,
+			irq_set_chip_data(irq, chip);
+			irq_set_chip_and_handler(irq, &pca953x_irq_chip,
 						 handle_edge_irq);
 #ifdef CONFIG_ARM
 			set_irq_flags(irq, IRQF_VALID);
 #else
-			set_irq_noprobe(irq);
+			irq_set_noprobe(irq);
 #endif
 		}
 

diff --git a/drivers/gpio/pl061.c b/drivers/gpio/pl061.c
index 838ddbd..6fcb28c 100644
--- a/drivers/gpio/pl061.c
+++ b/drivers/gpio/pl061.c

@@ -210,7 +210,7 @@
 
 static void pl061_irq_handler(unsigned irq, struct irq_desc *desc)
 {
-	struct list_head *chip_list = get_irq_data(irq);
+	struct list_head *chip_list = irq_get_handler_data(irq);
 	struct list_head *ptr;
 	struct pl061_gpio *chip;
 
@@ -294,7 +294,7 @@
 		ret = -ENODEV;
 		goto iounmap;
 	}
-	set_irq_chained_handler(irq, pl061_irq_handler);
+	irq_set_chained_handler(irq, pl061_irq_handler);
 	if (!test_and_set_bit(irq, init_irq)) { /* list initialized? */
 		chip_list = kmalloc(sizeof(*chip_list), GFP_KERNEL);
 		if (chip_list == NULL) {
@@ -303,9 +303,9 @@
 			goto iounmap;
 		}
 		INIT_LIST_HEAD(chip_list);
-		set_irq_data(irq, chip_list);
+		irq_set_handler_data(irq, chip_list);
 	} else
-		chip_list = get_irq_data(irq);
+		chip_list = irq_get_handler_data(irq);
 	list_add(&chip->list, chip_list);
 
 	for (i = 0; i < PL061_GPIO_NR; i++) {
@@ -315,10 +315,10 @@
 		else
 			pl061_direction_input(&chip->gc, i);
 
-		set_irq_chip(i+chip->irq_base, &pl061_irqchip);
-		set_irq_handler(i+chip->irq_base, handle_simple_irq);
+		irq_set_chip_and_handler(i + chip->irq_base, &pl061_irqchip,
+					 handle_simple_irq);
 		set_irq_flags(i+chip->irq_base, IRQF_VALID);
-		set_irq_chip_data(i+chip->irq_base, chip);
+		irq_set_chip_data(i + chip->irq_base, chip);
 	}
 
 	return 0;

diff --git a/drivers/gpio/stmpe-gpio.c b/drivers/gpio/stmpe-gpio.c
index eb2901f..4c980b5 100644
--- a/drivers/gpio/stmpe-gpio.c
+++ b/drivers/gpio/stmpe-gpio.c

@@ -254,14 +254,14 @@
 	int irq;
 
 	for (irq = base; irq < base + stmpe_gpio->chip.ngpio; irq++) {
-		set_irq_chip_data(irq, stmpe_gpio);
-		set_irq_chip_and_handler(irq, &stmpe_gpio_irq_chip,
+		irq_set_chip_data(irq, stmpe_gpio);
+		irq_set_chip_and_handler(irq, &stmpe_gpio_irq_chip,
 					 handle_simple_irq);
-		set_irq_nested_thread(irq, 1);
+		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
 #else
-		set_irq_noprobe(irq);
+		irq_set_noprobe(irq);
 #endif
 	}
 
@@ -277,8 +277,8 @@
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, 0);
 #endif
-		set_irq_chip_and_handler(irq, NULL, NULL);
-		set_irq_chip_data(irq, NULL);
+		irq_set_chip_and_handler(irq, NULL, NULL);
+		irq_set_chip_data(irq, NULL);
 	}
 }
 

diff --git a/drivers/gpio/sx150x.c b/drivers/gpio/sx150x.c
index d2f874c..a4f7353 100644
--- a/drivers/gpio/sx150x.c
+++ b/drivers/gpio/sx150x.c

@@ -551,12 +551,12 @@
 
 	for (n = 0; n < chip->dev_cfg->ngpios; ++n) {
 		irq = irq_base + n;
-		set_irq_chip_and_handler(irq, &chip->irq_chip, handle_edge_irq);
-		set_irq_nested_thread(irq, 1);
+		irq_set_chip_and_handler(irq, &chip->irq_chip, handle_edge_irq);
+		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
 #else
-		set_irq_noprobe(irq);
+		irq_set_noprobe(irq);
 #endif
 	}
 
@@ -583,8 +583,7 @@
 
 	for (n = 0; n < chip->dev_cfg->ngpios; ++n) {
 		irq = chip->irq_base + n;
-		set_irq_handler(irq, NULL);
-		set_irq_chip(irq, NULL);
+		irq_set_chip_and_handler(irq, NULL, NULL);
 	}
 }
 

diff --git a/drivers/gpio/tc3589x-gpio.c b/drivers/gpio/tc3589x-gpio.c
index 27200af..2a82e89 100644
--- a/drivers/gpio/tc3589x-gpio.c
+++ b/drivers/gpio/tc3589x-gpio.c

@@ -239,14 +239,14 @@
 	int irq;
 
 	for (irq = base; irq < base + tc3589x_gpio->chip.ngpio; irq++) {
-		set_irq_chip_data(irq, tc3589x_gpio);
-		set_irq_chip_and_handler(irq, &tc3589x_gpio_irq_chip,
+		irq_set_chip_data(irq, tc3589x_gpio);
+		irq_set_chip_and_handler(irq, &tc3589x_gpio_irq_chip,
 					 handle_simple_irq);
-		set_irq_nested_thread(irq, 1);
+		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
 #else
-		set_irq_noprobe(irq);
+		irq_set_noprobe(irq);
 #endif
 	}
 
@@ -262,8 +262,8 @@
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, 0);
 #endif
-		set_irq_chip_and_handler(irq, NULL, NULL);
-		set_irq_chip_data(irq, NULL);
+		irq_set_chip_and_handler(irq, NULL, NULL);
+		irq_set_chip_data(irq, NULL);
 	}
 }
 

diff --git a/drivers/gpio/timbgpio.c b/drivers/gpio/timbgpio.c
index ffcd815..edbe1ea 100644
--- a/drivers/gpio/timbgpio.c
+++ b/drivers/gpio/timbgpio.c

@@ -196,7 +196,7 @@
 
 static void timbgpio_irq(unsigned int irq, struct irq_desc *desc)
 {
-	struct timbgpio *tgpio = get_irq_data(irq);
+	struct timbgpio *tgpio = irq_get_handler_data(irq);
 	unsigned long ipr;
 	int offset;
 
@@ -292,16 +292,16 @@
 		return 0;
 
 	for (i = 0; i < pdata->nr_pins; i++) {
-		set_irq_chip_and_handler_name(tgpio->irq_base + i,
+		irq_set_chip_and_handler_name(tgpio->irq_base + i,
 			&timbgpio_irqchip, handle_simple_irq, "mux");
-		set_irq_chip_data(tgpio->irq_base + i, tgpio);
+		irq_set_chip_data(tgpio->irq_base + i, tgpio);
 #ifdef CONFIG_ARM
 		set_irq_flags(tgpio->irq_base + i, IRQF_VALID | IRQF_PROBE);
 #endif
 	}
 
-	set_irq_data(irq, tgpio);
-	set_irq_chained_handler(irq, timbgpio_irq);
+	irq_set_handler_data(irq, tgpio);
+	irq_set_chained_handler(irq, timbgpio_irq);
 
 	return 0;
 
@@ -327,12 +327,12 @@
 	if (irq >= 0 && tgpio->irq_base > 0) {
 		int i;
 		for (i = 0; i < tgpio->gpio.ngpio; i++) {
-			set_irq_chip(tgpio->irq_base + i, NULL);
-			set_irq_chip_data(tgpio->irq_base + i, NULL);
+			irq_set_chip(tgpio->irq_base + i, NULL);
+			irq_set_chip_data(tgpio->irq_base + i, NULL);
 		}
 
-		set_irq_handler(irq, NULL);
-		set_irq_data(irq, NULL);
+		irq_set_handler(irq, NULL);
+		irq_set_handler_data(irq, NULL);
 	}
 
 	err = gpiochip_remove(&tgpio->gpio);

diff --git a/drivers/gpio/vr41xx_giu.c b/drivers/gpio/vr41xx_giu.c
index cffa3bd..a365be0 100644
--- a/drivers/gpio/vr41xx_giu.c
+++ b/drivers/gpio/vr41xx_giu.c

@@ -238,13 +238,13 @@
 					break;
 				}
 			}
-			set_irq_chip_and_handler(GIU_IRQ(pin),
+			irq_set_chip_and_handler(GIU_IRQ(pin),
 						 &giuint_low_irq_chip,
 						 handle_edge_irq);
 		} else {
 			giu_clear(GIUINTTYPL, mask);
 			giu_clear(GIUINTHTSELL, mask);
-			set_irq_chip_and_handler(GIU_IRQ(pin),
+			irq_set_chip_and_handler(GIU_IRQ(pin),
 						 &giuint_low_irq_chip,
 						 handle_level_irq);
 		}
@@ -273,13 +273,13 @@
 					break;
 				}
 			}
-			set_irq_chip_and_handler(GIU_IRQ(pin),
+			irq_set_chip_and_handler(GIU_IRQ(pin),
 						 &giuint_high_irq_chip,
 						 handle_edge_irq);
 		} else {
 			giu_clear(GIUINTTYPH, mask);
 			giu_clear(GIUINTHTSELH, mask);
-			set_irq_chip_and_handler(GIU_IRQ(pin),
+			irq_set_chip_and_handler(GIU_IRQ(pin),
 						 &giuint_high_irq_chip,
 						 handle_level_irq);
 		}
@@ -539,9 +539,9 @@
 			chip = &giuint_high_irq_chip;
 
 		if (trigger & (1 << pin))
-			set_irq_chip_and_handler(i, chip, handle_edge_irq);
+			irq_set_chip_and_handler(i, chip, handle_edge_irq);
 		else
-			set_irq_chip_and_handler(i, chip, handle_level_irq);
+			irq_set_chip_and_handler(i, chip, handle_level_irq);
 
 	}
 

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 4c95b5f..799e149 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c

@@ -1073,6 +1073,9 @@
 	uint32_t __user *encoder_id;
 	struct drm_mode_group *mode_group;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	/*
@@ -1244,6 +1247,9 @@
 	struct drm_mode_object *obj;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	obj = drm_mode_object_find(dev, crtc_resp->crtc_id,
@@ -1312,6 +1318,9 @@
 	uint64_t __user *prop_values;
 	uint32_t __user *encoder_ptr;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	memset(&u_mode, 0, sizeof(struct drm_mode_modeinfo));
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:?]\n", out_resp->connector_id);
@@ -1431,6 +1440,9 @@
 	struct drm_encoder *encoder;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, enc_resp->encoder_id,
 				   DRM_MODE_OBJECT_ENCODER);
@@ -1486,6 +1498,9 @@
 	int ret = 0;
 	int i;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, crtc_req->crtc_id,
 				   DRM_MODE_OBJECT_CRTC);
@@ -1603,6 +1618,9 @@
 	struct drm_crtc *crtc;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (!req->flags) {
 		DRM_ERROR("no operation set\n");
 		return -EINVAL;
@@ -1667,6 +1685,9 @@
 	struct drm_framebuffer *fb;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if ((config->min_width > r->width) || (r->width > config->max_width)) {
 		DRM_ERROR("mode new framebuffer width not within limits\n");
 		return -EINVAL;
@@ -1724,6 +1745,9 @@
 	int ret = 0;
 	int found = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, *id, DRM_MODE_OBJECT_FB);
 	/* TODO check that we realy get a framebuffer back. */
@@ -1780,6 +1804,9 @@
 	struct drm_framebuffer *fb;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
 	if (!obj) {
@@ -1813,6 +1840,9 @@
 	int num_clips;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, r->fb_id, DRM_MODE_OBJECT_FB);
 	if (!obj) {
@@ -1996,6 +2026,9 @@
 	struct drm_mode_modeinfo *umode = &mode_cmd->mode;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
@@ -2042,6 +2075,9 @@
 	struct drm_mode_modeinfo *umode = &mode_cmd->mode;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
@@ -2211,6 +2247,9 @@
 	uint64_t __user *values_ptr;
 	uint32_t __user *blob_length_ptr;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY);
 	if (!obj) {
@@ -2333,6 +2372,9 @@
 	int ret = 0;
 	void *blob_ptr;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, out_resp->blob_id, DRM_MODE_OBJECT_BLOB);
 	if (!obj) {
@@ -2393,6 +2435,9 @@
 	int ret = -EINVAL;
 	int i;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 
 	obj = drm_mode_object_find(dev, out_resp->connector_id, DRM_MODE_OBJECT_CONNECTOR);
@@ -2509,6 +2554,9 @@
 	int size;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
 	if (!obj) {
@@ -2560,6 +2608,9 @@
 	int size;
 	int ret = 0;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->mode_config.mutex);
 	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
 	if (!obj) {

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 57ce27c..74e4ff5 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c

@@ -499,11 +499,12 @@
 void drm_gem_vm_close(struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
 
-	mutex_lock(&obj->dev->struct_mutex);
+	mutex_lock(&dev->struct_mutex);
 	drm_vm_close_locked(vma);
 	drm_gem_object_unreference(obj);
-	mutex_unlock(&obj->dev->struct_mutex);
+	mutex_unlock(&dev->struct_mutex);
 }
 EXPORT_SYMBOL(drm_gem_vm_close);
 

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7f6912a..904d7e9 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c

@@ -280,6 +280,9 @@
 		if (dev->driver->dumb_create)
 			req->value = 1;
 		break;
+	case DRM_CAP_VBLANK_HIGH_CRTC:
+		req->value = 1;
+		break;
 	default:
 		return -EINVAL;
 	}

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index a34ef97..741457b 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c

@@ -1125,7 +1125,7 @@
 {
 	union drm_wait_vblank *vblwait = data;
 	int ret = 0;
-	unsigned int flags, seq, crtc;
+	unsigned int flags, seq, crtc, high_crtc;
 
 	if ((!drm_dev_to_irq(dev)) || (!dev->irq_enabled))
 		return -EINVAL;
@@ -1134,16 +1134,21 @@
 		return -EINVAL;
 
 	if (vblwait->request.type &
-	    ~(_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK)) {
+	    ~(_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK |
+	      _DRM_VBLANK_HIGH_CRTC_MASK)) {
 		DRM_ERROR("Unsupported type value 0x%x, supported mask 0x%x\n",
 			  vblwait->request.type,
-			  (_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK));
+			  (_DRM_VBLANK_TYPES_MASK | _DRM_VBLANK_FLAGS_MASK |
+			   _DRM_VBLANK_HIGH_CRTC_MASK));
 		return -EINVAL;
 	}
 
 	flags = vblwait->request.type & _DRM_VBLANK_FLAGS_MASK;
-	crtc = flags & _DRM_VBLANK_SECONDARY ? 1 : 0;
-
+	high_crtc = (vblwait->request.type & _DRM_VBLANK_HIGH_CRTC_MASK);
+	if (high_crtc)
+		crtc = high_crtc >> _DRM_VBLANK_HIGH_CRTC_SHIFT;
+	else
+		crtc = flags & _DRM_VBLANK_SECONDARY ? 1 : 0;
 	if (crtc >= dev->num_crtcs)
 		return -EINVAL;
 

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 09e0327..87c8e29 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c

@@ -892,7 +892,7 @@
 		seq_printf(m, "Render p-state limit: %d\n",
 			   rp_state_limits & 0xff);
 		seq_printf(m, "CAGF: %dMHz\n", ((rpstat & GEN6_CAGF_MASK) >>
-						GEN6_CAGF_SHIFT) * 100);
+						GEN6_CAGF_SHIFT) * 50);
 		seq_printf(m, "RP CUR UP EI: %dus\n", rpupei &
 			   GEN6_CURICONT_MASK);
 		seq_printf(m, "RP CUR UP: %dus\n", rpcurup &
@@ -908,15 +908,15 @@
 
 		max_freq = (rp_state_cap & 0xff0000) >> 16;
 		seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
-			   max_freq * 100);
+			   max_freq * 50);
 
 		max_freq = (rp_state_cap & 0xff00) >> 8;
 		seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
-			   max_freq * 100);
+			   max_freq * 50);
 
 		max_freq = rp_state_cap & 0xff;
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
-			   max_freq * 100);
+			   max_freq * 50);
 
 		__gen6_gt_force_wake_put(dev_priv);
 	} else {

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c4c2855..7ce3f35 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c

@@ -224,7 +224,7 @@
 		     struct drm_mode_create_dumb *args)
 {
 	/* have to work out size/pitch and return them */
-	args->pitch = ALIGN(args->width & ((args->bpp + 1) / 8), 64);
+	args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
 	args->size = args->pitch * args->height;
 	return i915_gem_create(file, dev,
 			       args->size, &args->handle);
@@ -1356,9 +1356,10 @@
 	if (!obj->fault_mappable)
 		return;
 
-	unmap_mapping_range(obj->base.dev->dev_mapping,
-			    (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
-			    obj->base.size, 1);
+	if (obj->base.dev->dev_mapping)
+		unmap_mapping_range(obj->base.dev->dev_mapping,
+				    (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
+				    obj->base.size, 1);
 
 	obj->fault_mappable = false;
 }
@@ -1796,8 +1797,10 @@
 		return;
 
 	spin_lock(&file_priv->mm.lock);
-	list_del(&request->client_list);
-	request->file_priv = NULL;
+	if (request->file_priv) {
+		list_del(&request->client_list);
+		request->file_priv = NULL;
+	}
 	spin_unlock(&file_priv->mm.lock);
 }
 
@@ -2217,13 +2220,18 @@
 {
 	int ret;
 
+	if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
+		return 0;
+
 	trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
 
 	ret = ring->flush(ring, invalidate_domains, flush_domains);
 	if (ret)
 		return ret;
 
-	i915_gem_process_flushing_list(ring, flush_domains);
+	if (flush_domains & I915_GEM_GPU_DOMAINS)
+		i915_gem_process_flushing_list(ring, flush_domains);
+
 	return 0;
 }
 
@@ -2579,8 +2587,23 @@
 		reg = &dev_priv->fence_regs[obj->fence_reg];
 		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
 
-		if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
-			pipelined = NULL;
+		if (obj->tiling_changed) {
+			ret = i915_gem_object_flush_fence(obj, pipelined);
+			if (ret)
+				return ret;
+
+			if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
+				pipelined = NULL;
+
+			if (pipelined) {
+				reg->setup_seqno =
+					i915_gem_next_request_seqno(pipelined);
+				obj->last_fenced_seqno = reg->setup_seqno;
+				obj->last_fenced_ring = pipelined;
+			}
+
+			goto update;
+		}
 
 		if (!pipelined) {
 			if (reg->setup_seqno) {
@@ -2599,31 +2622,6 @@
 			ret = i915_gem_object_flush_fence(obj, pipelined);
 			if (ret)
 				return ret;
-		} else if (obj->tiling_changed) {
-			if (obj->fenced_gpu_access) {
-				if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
-					ret = i915_gem_flush_ring(obj->ring,
-								  0, obj->base.write_domain);
-					if (ret)
-						return ret;
-				}
-
-				obj->fenced_gpu_access = false;
-			}
-		}
-
-		if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
-			pipelined = NULL;
-		BUG_ON(!pipelined && reg->setup_seqno);
-
-		if (obj->tiling_changed) {
-			if (pipelined) {
-				reg->setup_seqno =
-					i915_gem_next_request_seqno(pipelined);
-				obj->last_fenced_seqno = reg->setup_seqno;
-				obj->last_fenced_ring = pipelined;
-			}
-			goto update;
 		}
 
 		return 0;
@@ -3606,6 +3604,8 @@
 		return;
 	}
 
+	trace_i915_gem_object_destroy(obj);
+
 	if (obj->base.map_list.map)
 		i915_gem_free_mmap_offset(obj);
 
@@ -3615,8 +3615,6 @@
 	kfree(obj->page_cpu_valid);
 	kfree(obj->bit_17);
 	kfree(obj);
-
-	trace_i915_gem_object_destroy(obj);
 }
 
 void i915_gem_free_object(struct drm_gem_object *gem_obj)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7ff7f93..20a4cc5 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -367,6 +367,10 @@
 		uint32_t __iomem *reloc_entry;
 		void __iomem *reloc_page;
 
+		/* We can't wait for rendering with pagefaults disabled */
+		if (obj->active && in_atomic())
+			return -EFAULT;
+
 		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
 		if (ret)
 			return ret;
@@ -440,15 +444,24 @@
 			     struct list_head *objects)
 {
 	struct drm_i915_gem_object *obj;
-	int ret;
+	int ret = 0;
 
+	/* This is the fast path and we cannot handle a pagefault whilst
+	 * holding the struct mutex lest the user pass in the relocations
+	 * contained within a mmaped bo. For in such a case we, the page
+	 * fault handler would call i915_gem_fault() and we would try to
+	 * acquire the struct mutex again. Obviously this is bad and so
+	 * lockdep complains vehemently.
+	 */
+	pagefault_disable();
 	list_for_each_entry(obj, objects, exec_list) {
 		ret = i915_gem_execbuffer_relocate_object(obj, eb);
 		if (ret)
-			return ret;
+			break;
 	}
+	pagefault_enable();
 
-	return 0;
+	return ret;
 }
 
 static int

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3106c0d..432fc04 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c

@@ -1516,9 +1516,10 @@
 
 	reg = PIPECONF(pipe);
 	val = I915_READ(reg);
-	val |= PIPECONF_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	if (val & PIPECONF_ENABLE)
+		return;
+
+	I915_WRITE(reg, val | PIPECONF_ENABLE);
 	intel_wait_for_vblank(dev_priv->dev, pipe);
 }
 
@@ -1552,9 +1553,10 @@
 
 	reg = PIPECONF(pipe);
 	val = I915_READ(reg);
-	val &= ~PIPECONF_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	if ((val & PIPECONF_ENABLE) == 0)
+		return;
+
+	I915_WRITE(reg, val & ~PIPECONF_ENABLE);
 	intel_wait_for_pipe_off(dev_priv->dev, pipe);
 }
 
@@ -1577,9 +1579,10 @@
 
 	reg = DSPCNTR(plane);
 	val = I915_READ(reg);
-	val |= DISPLAY_PLANE_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	if (val & DISPLAY_PLANE_ENABLE)
+		return;
+
+	I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE);
 	intel_wait_for_vblank(dev_priv->dev, pipe);
 }
 
@@ -1610,9 +1613,10 @@
 
 	reg = DSPCNTR(plane);
 	val = I915_READ(reg);
-	val &= ~DISPLAY_PLANE_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	if ((val & DISPLAY_PLANE_ENABLE) == 0)
+		return;
+
+	I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE);
 	intel_flush_display_plane(dev_priv, plane);
 	intel_wait_for_vblank(dev_priv->dev, pipe);
 }
@@ -1769,7 +1773,6 @@
 			return;
 
 		I915_WRITE(DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN);
-		POSTING_READ(DPFC_CONTROL);
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
 	}
 
@@ -1861,7 +1864,6 @@
 			return;
 
 		I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl & ~DPFC_CTL_EN);
-		POSTING_READ(ILK_DPFC_CONTROL);
 		intel_wait_for_vblank(dev, intel_crtc->pipe);
 	}
 
@@ -3883,10 +3885,7 @@
 			      display, cursor);
 }
 
-static inline bool single_plane_enabled(unsigned int mask)
-{
-	return mask && (mask & -mask) == 0;
-}
+#define single_plane_enabled(mask) is_power_of_2(mask)
 
 static void g4x_update_wm(struct drm_device *dev)
 {
@@ -5777,7 +5776,6 @@
 
 		dpll &= ~DISPLAY_RATE_SELECT_FPA1;
 		I915_WRITE(dpll_reg, dpll);
-		POSTING_READ(dpll_reg);
 		intel_wait_for_vblank(dev, pipe);
 
 		dpll = I915_READ(dpll_reg);
@@ -5821,7 +5819,6 @@
 
 		dpll |= DISPLAY_RATE_SELECT_FPA1;
 		I915_WRITE(dpll_reg, dpll);
-		dpll = I915_READ(dpll_reg);
 		intel_wait_for_vblank(dev, pipe);
 		dpll = I915_READ(dpll_reg);
 		if (!(dpll & DISPLAY_RATE_SELECT_FPA1))
@@ -6933,7 +6930,7 @@
 		DRM_ERROR("timeout waiting for pcode mailbox to finish\n");
 	if (pcu_mbox & (1<<31)) { /* OC supported */
 		max_freq = pcu_mbox & 0xff;
-		DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 100);
+		DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50);
 	}
 
 	/* In units of 100MHz */

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index d29e33f..0daefca 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c

@@ -1957,9 +1957,9 @@
 					DP_NO_AUX_HANDSHAKE_LINK_TRAINING;
 		} else {
 			/* if this fails, presume the device is a ghost */
-			DRM_ERROR("failed to retrieve link info\n");
-			intel_dp_destroy(&intel_connector->base);
+			DRM_INFO("failed to retrieve link info, disabling eDP\n");
 			intel_dp_encoder_destroy(&intel_dp->base.base);
+			intel_dp_destroy(&intel_connector->base);
 			return;
 		}
 	}

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 789c478..e9e6f71 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c

@@ -65,62 +65,60 @@
 	u32 cmd;
 	int ret;
 
-	if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
+	/*
+	 * read/write caches:
+	 *
+	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
+	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
+	 * also flushed at 2d versus 3d pipeline switches.
+	 *
+	 * read-only caches:
+	 *
+	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
+	 * MI_READ_FLUSH is set, and is always flushed on 965.
+	 *
+	 * I915_GEM_DOMAIN_COMMAND may not exist?
+	 *
+	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
+	 * invalidated when MI_EXE_FLUSH is set.
+	 *
+	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
+	 * invalidated with every MI_FLUSH.
+	 *
+	 * TLBs:
+	 *
+	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
+	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
+	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
+	 * are flushed at any MI_FLUSH.
+	 */
+
+	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
+	if ((invalidate_domains|flush_domains) &
+	    I915_GEM_DOMAIN_RENDER)
+		cmd &= ~MI_NO_WRITE_FLUSH;
+	if (INTEL_INFO(dev)->gen < 4) {
 		/*
-		 * read/write caches:
-		 *
-		 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
-		 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
-		 * also flushed at 2d versus 3d pipeline switches.
-		 *
-		 * read-only caches:
-		 *
-		 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
-		 * MI_READ_FLUSH is set, and is always flushed on 965.
-		 *
-		 * I915_GEM_DOMAIN_COMMAND may not exist?
-		 *
-		 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
-		 * invalidated when MI_EXE_FLUSH is set.
-		 *
-		 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
-		 * invalidated with every MI_FLUSH.
-		 *
-		 * TLBs:
-		 *
-		 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
-		 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
-		 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
-		 * are flushed at any MI_FLUSH.
+		 * On the 965, the sampler cache always gets flushed
+		 * and this bit is reserved.
 		 */
-
-		cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
-		if ((invalidate_domains|flush_domains) &
-		    I915_GEM_DOMAIN_RENDER)
-			cmd &= ~MI_NO_WRITE_FLUSH;
-		if (INTEL_INFO(dev)->gen < 4) {
-			/*
-			 * On the 965, the sampler cache always gets flushed
-			 * and this bit is reserved.
-			 */
-			if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
-				cmd |= MI_READ_FLUSH;
-		}
-		if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
-			cmd |= MI_EXE_FLUSH;
-
-		if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
-		    (IS_G4X(dev) || IS_GEN5(dev)))
-			cmd |= MI_INVALIDATE_ISP;
-
-		ret = intel_ring_begin(ring, 2);
-		if (ret)
-			return ret;
-
-		intel_ring_emit(ring, cmd);
-		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_advance(ring);
+		if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
+			cmd |= MI_READ_FLUSH;
 	}
+	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
+		cmd |= MI_EXE_FLUSH;
+
+	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
+	    (IS_G4X(dev) || IS_GEN5(dev)))
+		cmd |= MI_INVALIDATE_ISP;
+
+	ret = intel_ring_begin(ring, 2);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, cmd);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
 	return 0;
 }
@@ -568,9 +566,6 @@
 {
 	int ret;
 
-	if ((flush_domains & I915_GEM_DOMAIN_RENDER) == 0)
-		return 0;
-
 	ret = intel_ring_begin(ring, 2);
 	if (ret)
 		return ret;
@@ -1056,9 +1051,6 @@
 	uint32_t cmd;
 	int ret;
 
-	if (((invalidate | flush) & I915_GEM_GPU_DOMAINS) == 0)
-		return 0;
-
 	ret = intel_ring_begin(ring, 4);
 	if (ret)
 		return ret;
@@ -1230,9 +1222,6 @@
 	uint32_t cmd;
 	int ret;
 
-	if (((invalidate | flush) & I915_GEM_DOMAIN_RENDER) == 0)
-		return 0;
-
 	ret = blt_ring_begin(ring, 4);
 	if (ret)
 		return ret;

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 3cd3234..10e41af 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c

@@ -957,7 +957,11 @@
 	/* adjust pixel clock as needed */
 	adjusted_clock = atombios_adjust_pll(crtc, mode, pll, ss_enabled, &ss);
 
-	if (ASIC_IS_AVIVO(rdev))
+	if (radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+		/* TV seems to prefer the legacy algo on some boards */
+		radeon_compute_pll_legacy(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div,
+					  &ref_div, &post_div);
+	else if (ASIC_IS_AVIVO(rdev))
 		radeon_compute_pll_avivo(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div,
 					 &ref_div, &post_div);
 	else

diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index cf7c8d5..cf602e2 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c

@@ -448,7 +448,7 @@
 
 bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
 {
-	int edid_info;
+	int edid_info, size;
 	struct edid *edid;
 	unsigned char *raw;
 	edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE);
@@ -456,11 +456,12 @@
 		return false;
 
 	raw = rdev->bios + edid_info;
-	edid = kmalloc(EDID_LENGTH * (raw[0x7e] + 1), GFP_KERNEL);
+	size = EDID_LENGTH * (raw[0x7e] + 1);
+	edid = kmalloc(size, GFP_KERNEL);
 	if (edid == NULL)
 		return false;
 
-	memcpy((unsigned char *)edid, raw, EDID_LENGTH * (raw[0x7e] + 1));
+	memcpy((unsigned char *)edid, raw, size);
 
 	if (!drm_edid_is_valid(edid)) {
 		kfree(edid);
@@ -468,6 +469,7 @@
 	}
 
 	rdev->mode_info.bios_hardcoded_edid = edid;
+	rdev->mode_info.bios_hardcoded_edid_size = size;
 	return true;
 }
 
@@ -475,8 +477,17 @@
 struct edid *
 radeon_bios_get_hardcoded_edid(struct radeon_device *rdev)
 {
-	if (rdev->mode_info.bios_hardcoded_edid)
-		return rdev->mode_info.bios_hardcoded_edid;
+	struct edid *edid;
+
+	if (rdev->mode_info.bios_hardcoded_edid) {
+		edid = kmalloc(rdev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL);
+		if (edid) {
+			memcpy((unsigned char *)edid,
+			       (unsigned char *)rdev->mode_info.bios_hardcoded_edid,
+			       rdev->mode_info.bios_hardcoded_edid_size);
+			return edid;
+		}
+	}
 	return NULL;
 }
 

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 28c7961..2ef6d51 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c

@@ -633,6 +633,8 @@
 static enum drm_connector_status
 radeon_vga_detect(struct drm_connector *connector, bool force)
 {
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder;
 	struct drm_encoder_helper_funcs *encoder_funcs;
@@ -683,6 +685,17 @@
 
 	if (ret == connector_status_connected)
 		ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, true);
+
+	/* RN50 and some RV100 asics in servers often have a hardcoded EDID in the
+	 * vbios to deal with KVMs. If we have one and are not able to detect a monitor
+	 * by other means, assume the CRT is connected and use that EDID.
+	 */
+	if ((!rdev->is_atom_bios) &&
+	    (ret == connector_status_disconnected) &&
+	    rdev->mode_info.bios_hardcoded_edid_size) {
+		ret = connector_status_connected;
+	}
+
 	radeon_connector_update_scratch_regs(connector, ret);
 	return ret;
 }
@@ -794,6 +807,8 @@
 static enum drm_connector_status
 radeon_dvi_detect(struct drm_connector *connector, bool force)
 {
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct drm_encoder *encoder = NULL;
 	struct drm_encoder_helper_funcs *encoder_funcs;
@@ -833,8 +848,6 @@
 			 * you don't really know what's connected to which port as both are digital.
 			 */
 			if (radeon_connector->shared_ddc && (ret == connector_status_connected)) {
-				struct drm_device *dev = connector->dev;
-				struct radeon_device *rdev = dev->dev_private;
 				struct drm_connector *list_connector;
 				struct radeon_connector *list_radeon_connector;
 				list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) {
@@ -899,6 +912,19 @@
 		ret = radeon_connector_analog_encoder_conflict_solve(connector, encoder, ret, true);
 	}
 
+	/* RN50 and some RV100 asics in servers often have a hardcoded EDID in the
+	 * vbios to deal with KVMs. If we have one and are not able to detect a monitor
+	 * by other means, assume the DFP is connected and use that EDID.  In most
+	 * cases the DVI port is actually a virtual KVM port connected to the service
+	 * processor.
+	 */
+	if ((!rdev->is_atom_bios) &&
+	    (ret == connector_status_disconnected) &&
+	    rdev->mode_info.bios_hardcoded_edid_size) {
+		radeon_connector->use_digital = true;
+		ret = connector_status_connected;
+	}
+
 out:
 	/* updated in get modes as well since we need to know if it's analog or digital */
 	radeon_connector_update_scratch_regs(connector, ret);

diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index e458281..9c57538 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h

@@ -239,6 +239,7 @@
 	struct drm_property *underscan_vborder_property;
 	/* hardcoded DFP edid from BIOS */
 	struct edid *bios_hardcoded_edid;
+	int bios_hardcoded_edid_size;
 
 	/* pointer to fbdev info structure */
 	struct radeon_fbdev *rfbdev;

diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 2aed03b..08de669 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c

@@ -365,12 +365,14 @@
 		else if (strncmp("high", buf, strlen("high")) == 0)
 			rdev->pm.profile = PM_PROFILE_HIGH;
 		else {
-			DRM_ERROR("invalid power profile!\n");
+			count = -EINVAL;
 			goto fail;
 		}
 		radeon_pm_update_profile(rdev);
 		radeon_pm_set_clocks(rdev);
-	}
+	} else
+		count = -EINVAL;
+
 fail:
 	mutex_unlock(&rdev->pm.mutex);
 
@@ -413,7 +415,7 @@
 		mutex_unlock(&rdev->pm.mutex);
 		cancel_delayed_work_sync(&rdev->pm.dynpm_idle_work);
 	} else {
-		DRM_ERROR("invalid power method!\n");
+		count = -EINVAL;
 		goto fail;
 	}
 	radeon_pm_compute_clocks(rdev);

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 81131ed..060ef63 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig

@@ -315,11 +315,22 @@
 	  will be called f71805f.
 
 config SENSORS_F71882FG
-	tristate "Fintek F71858FG, F71862FG, F71882FG, F71889FG and F8000"
+	tristate "Fintek F71882FG and compatibles"
 	help
 	  If you say yes here you get support for hardware monitoring
-	  features of the Fintek F71858FG, F71862FG/71863FG, F71882FG/F71883FG,
-	  F71889FG and F8000 Super-I/O chips.
+	  features of many Fintek Super-I/O (LPC) chips. The currently
+	  supported chips are:
+	    F71808E
+	    F71858FG
+	    F71862FG
+	    F71863FG
+	    F71869F/E
+	    F71882FG
+	    F71883FG
+	    F71889FG/ED/A
+	    F8000
+	    F81801U
+	    F81865F
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called f71882fg.

diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index a4d430e..ca07a32 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c

@@ -54,7 +54,9 @@
 #define SIO_F71882_ID		0x0541	/* Chipset ID */
 #define SIO_F71889_ID		0x0723	/* Chipset ID */
 #define SIO_F71889E_ID		0x0909	/* Chipset ID */
+#define SIO_F71889A_ID		0x1005	/* Chipset ID */
 #define SIO_F8000_ID		0x0581	/* Chipset ID */
+#define SIO_F81865_ID		0x0704	/* Chipset ID */
 
 #define REGION_LENGTH		8
 #define ADDR_REG_OFFSET		5
@@ -106,7 +108,7 @@
 MODULE_PARM_DESC(force_id, "Override the detected device ID");
 
 enum chips { f71808e, f71858fg, f71862fg, f71869, f71882fg, f71889fg,
-	     f71889ed, f8000 };
+	     f71889ed, f71889a, f8000, f81865f };
 
 static const char *f71882fg_names[] = {
 	"f71808e",
@@ -114,42 +116,76 @@
 	"f71862fg",
 	"f71869", /* Both f71869f and f71869e, reg. compatible and same id */
 	"f71882fg",
-	"f71889fg",
+	"f71889fg", /* f81801u too, same id */
 	"f71889ed",
+	"f71889a",
 	"f8000",
+	"f81865f",
 };
 
-static const char f71882fg_has_in[8][F71882FG_MAX_INS] = {
-	{ 1, 1, 1, 1, 1, 1, 0, 1, 1 }, /* f71808e */
-	{ 1, 1, 1, 0, 0, 0, 0, 0, 0 }, /* f71858fg */
-	{ 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71862fg */
-	{ 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71869 */
-	{ 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71882fg */
-	{ 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71889fg */
-	{ 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* f71889ed */
-	{ 1, 1, 1, 0, 0, 0, 0, 0, 0 }, /* f8000 */
+static const char f71882fg_has_in[][F71882FG_MAX_INS] = {
+	[f71808e]	= { 1, 1, 1, 1, 1, 1, 0, 1, 1 },
+	[f71858fg]	= { 1, 1, 1, 0, 0, 0, 0, 0, 0 },
+	[f71862fg]	= { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+	[f71869]	= { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+	[f71882fg]	= { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+	[f71889fg]	= { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+	[f71889ed]	= { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+	[f71889a]	= { 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+	[f8000]		= { 1, 1, 1, 0, 0, 0, 0, 0, 0 },
+	[f81865f]	= { 1, 1, 1, 1, 1, 1, 1, 0, 0 },
 };
 
-static const char f71882fg_has_in1_alarm[8] = {
-	0, /* f71808e */
-	0, /* f71858fg */
-	0, /* f71862fg */
-	0, /* f71869 */
-	1, /* f71882fg */
-	1, /* f71889fg */
-	1, /* f71889ed */
-	0, /* f8000 */
+static const char f71882fg_has_in1_alarm[] = {
+	[f71808e]	= 0,
+	[f71858fg]	= 0,
+	[f71862fg]	= 0,
+	[f71869]	= 0,
+	[f71882fg]	= 1,
+	[f71889fg]	= 1,
+	[f71889ed]	= 1,
+	[f71889a]	= 1,
+	[f8000]		= 0,
+	[f81865f]	= 1,
 };
 
-static const char f71882fg_has_beep[8] = {
-	0, /* f71808e */
-	0, /* f71858fg */
-	1, /* f71862fg */
-	1, /* f71869 */
-	1, /* f71882fg */
-	1, /* f71889fg */
-	1, /* f71889ed */
-	0, /* f8000 */
+static const char f71882fg_has_beep[] = {
+	[f71808e]	= 0,
+	[f71858fg]	= 0,
+	[f71862fg]	= 1,
+	[f71869]	= 1,
+	[f71882fg]	= 1,
+	[f71889fg]	= 1,
+	[f71889ed]	= 1,
+	[f71889a]	= 1,
+	[f8000]		= 0,
+	[f81865f]	= 1,
+};
+
+static const char f71882fg_nr_fans[] = {
+	[f71808e]	= 3,
+	[f71858fg]	= 3,
+	[f71862fg]	= 3,
+	[f71869]	= 3,
+	[f71882fg]	= 4,
+	[f71889fg]	= 3,
+	[f71889ed]	= 3,
+	[f71889a]	= 3,
+	[f8000]		= 3,
+	[f81865f]	= 2,
+};
+
+static const char f71882fg_nr_temps[] = {
+	[f71808e]	= 2,
+	[f71858fg]	= 3,
+	[f71862fg]	= 3,
+	[f71869]	= 3,
+	[f71882fg]	= 3,
+	[f71889fg]	= 3,
+	[f71889ed]	= 3,
+	[f71889a]	= 3,
+	[f8000]		= 3,
+	[f81865f]	= 2,
 };
 
 static struct platform_device *f71882fg_pdev;
@@ -1071,9 +1107,9 @@
 static struct f71882fg_data *f71882fg_update_device(struct device *dev)
 {
 	struct f71882fg_data *data = dev_get_drvdata(dev);
+	int nr_fans = f71882fg_nr_fans[data->type];
+	int nr_temps = f71882fg_nr_temps[data->type];
 	int nr, reg, point;
-	int nr_fans = (data->type == f71882fg) ? 4 : 3;
-	int nr_temps = (data->type == f71808e) ? 2 : 3;
 
 	mutex_lock(&data->update_lock);
 
@@ -2042,8 +2078,9 @@
 {
 	struct f71882fg_data *data;
 	struct f71882fg_sio_data *sio_data = pdev->dev.platform_data;
-	int err, i, nr_fans = (sio_data->type == f71882fg) ? 4 : 3;
-	int nr_temps = (sio_data->type == f71808e) ? 2 : 3;
+	int nr_fans = f71882fg_nr_fans[sio_data->type];
+	int nr_temps = f71882fg_nr_temps[sio_data->type];
+	int err, i;
 	u8 start_reg, reg;
 
 	data = kzalloc(sizeof(struct f71882fg_data), GFP_KERNEL);
@@ -2138,6 +2175,7 @@
 			/* Fall through to select correct fan/pwm reg bank! */
 		case f71889fg:
 		case f71889ed:
+		case f71889a:
 			reg = f71882fg_read8(data, F71882FG_REG_FAN_FAULT_T);
 			if (reg & F71882FG_FAN_NEG_TEMP_EN)
 				data->auto_point_temp_signed = 1;
@@ -2163,16 +2201,12 @@
 		case f71862fg:
 			err = (data->pwm_enable & 0x15) != 0x15;
 			break;
-		case f71808e:
-		case f71869:
-		case f71882fg:
-		case f71889fg:
-		case f71889ed:
-			err = 0;
-			break;
 		case f8000:
 			err = data->pwm_enable & 0x20;
 			break;
+		default:
+			err = 0;
+			break;
 		}
 		if (err) {
 			dev_err(&pdev->dev,
@@ -2199,6 +2233,7 @@
 		case f71869:
 		case f71889fg:
 		case f71889ed:
+		case f71889a:
 			for (i = 0; i < nr_fans; i++) {
 				data->pwm_auto_point_mapping[i] =
 					f71882fg_read8(data,
@@ -2276,8 +2311,9 @@
 static int f71882fg_remove(struct platform_device *pdev)
 {
 	struct f71882fg_data *data = platform_get_drvdata(pdev);
-	int i, nr_fans = (data->type == f71882fg) ? 4 : 3;
-	int nr_temps = (data->type == f71808e) ? 2 : 3;
+	int nr_fans = f71882fg_nr_fans[data->type];
+	int nr_temps = f71882fg_nr_temps[data->type];
+	int i;
 	u8 start_reg = f71882fg_read8(data, F71882FG_REG_START);
 
 	if (data->hwmon_dev)
@@ -2406,9 +2442,15 @@
 	case SIO_F71889E_ID:
 		sio_data->type = f71889ed;
 		break;
+	case SIO_F71889A_ID:
+		sio_data->type = f71889a;
+		break;
 	case SIO_F8000_ID:
 		sio_data->type = f8000;
 		break;
+	case SIO_F81865_ID:
+		sio_data->type = f81865f;
+		break;
 	default:
 		pr_info("Unsupported Fintek device: %04x\n",
 			(unsigned int)devid);

diff --git a/drivers/hwmon/pmbus_core.c b/drivers/hwmon/pmbus_core.c
index 6474512..edfb92e 100644
--- a/drivers/hwmon/pmbus_core.c
+++ b/drivers/hwmon/pmbus_core.c

@@ -752,7 +752,7 @@
 static void pmbus_add_sensor(struct pmbus_data *data,
 			     const char *name, const char *type, int seq,
 			     int page, int reg, enum pmbus_sensor_classes class,
-			     bool update)
+			     bool update, bool readonly)
 {
 	struct pmbus_sensor *sensor;
 
@@ -765,7 +765,7 @@
 	sensor->reg = reg;
 	sensor->class = class;
 	sensor->update = update;
-	if (update)
+	if (readonly)
 		PMBUS_ADD_GET_ATTR(data, sensor->name, sensor,
 				   data->num_sensors);
 	else
@@ -916,14 +916,14 @@
 
 		i0 = data->num_sensors;
 		pmbus_add_label(data, "in", in_index, "vin", 0);
-		pmbus_add_sensor(data, "in", "input", in_index,
-				 0, PMBUS_READ_VIN, PSC_VOLTAGE_IN, true);
+		pmbus_add_sensor(data, "in", "input", in_index, 0,
+				 PMBUS_READ_VIN, PSC_VOLTAGE_IN, true, true);
 		if (pmbus_check_word_register(client, 0,
 					      PMBUS_VIN_UV_WARN_LIMIT)) {
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "in", "min", in_index,
 					 0, PMBUS_VIN_UV_WARN_LIMIT,
-					 PSC_VOLTAGE_IN, false);
+					 PSC_VOLTAGE_IN, false, false);
 			if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
 				pmbus_add_boolean_reg(data, "in", "min_alarm",
 						      in_index,
@@ -937,7 +937,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "in", "lcrit", in_index,
 					 0, PMBUS_VIN_UV_FAULT_LIMIT,
-					 PSC_VOLTAGE_IN, false);
+					 PSC_VOLTAGE_IN, false, false);
 			if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
 				pmbus_add_boolean_reg(data, "in", "lcrit_alarm",
 						      in_index,
@@ -951,7 +951,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "in", "max", in_index,
 					 0, PMBUS_VIN_OV_WARN_LIMIT,
-					 PSC_VOLTAGE_IN, false);
+					 PSC_VOLTAGE_IN, false, false);
 			if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
 				pmbus_add_boolean_reg(data, "in", "max_alarm",
 						      in_index,
@@ -965,7 +965,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "in", "crit", in_index,
 					 0, PMBUS_VIN_OV_FAULT_LIMIT,
-					 PSC_VOLTAGE_IN, false);
+					 PSC_VOLTAGE_IN, false, false);
 			if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
 				pmbus_add_boolean_reg(data, "in", "crit_alarm",
 						      in_index,
@@ -988,7 +988,7 @@
 	if (info->func[0] & PMBUS_HAVE_VCAP) {
 		pmbus_add_label(data, "in", in_index, "vcap", 0);
 		pmbus_add_sensor(data, "in", "input", in_index, 0,
-				 PMBUS_READ_VCAP, PSC_VOLTAGE_IN, true);
+				 PMBUS_READ_VCAP, PSC_VOLTAGE_IN, true, true);
 		in_index++;
 	}
 
@@ -1004,13 +1004,13 @@
 		i0 = data->num_sensors;
 		pmbus_add_label(data, "in", in_index, "vout", page + 1);
 		pmbus_add_sensor(data, "in", "input", in_index, page,
-				 PMBUS_READ_VOUT, PSC_VOLTAGE_OUT, true);
+				 PMBUS_READ_VOUT, PSC_VOLTAGE_OUT, true, true);
 		if (pmbus_check_word_register(client, page,
 					      PMBUS_VOUT_UV_WARN_LIMIT)) {
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "in", "min", in_index, page,
 					 PMBUS_VOUT_UV_WARN_LIMIT,
-					 PSC_VOLTAGE_OUT, false);
+					 PSC_VOLTAGE_OUT, false, false);
 			if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) {
 				pmbus_add_boolean_reg(data, "in", "min_alarm",
 						      in_index,
@@ -1025,7 +1025,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "in", "lcrit", in_index, page,
 					 PMBUS_VOUT_UV_FAULT_LIMIT,
-					 PSC_VOLTAGE_OUT, false);
+					 PSC_VOLTAGE_OUT, false, false);
 			if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) {
 				pmbus_add_boolean_reg(data, "in", "lcrit_alarm",
 						      in_index,
@@ -1040,7 +1040,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "in", "max", in_index, page,
 					 PMBUS_VOUT_OV_WARN_LIMIT,
-					 PSC_VOLTAGE_OUT, false);
+					 PSC_VOLTAGE_OUT, false, false);
 			if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) {
 				pmbus_add_boolean_reg(data, "in", "max_alarm",
 						      in_index,
@@ -1055,7 +1055,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "in", "crit", in_index, page,
 					 PMBUS_VOUT_OV_FAULT_LIMIT,
-					 PSC_VOLTAGE_OUT, false);
+					 PSC_VOLTAGE_OUT, false, false);
 			if (info->func[page] & PMBUS_HAVE_STATUS_VOUT) {
 				pmbus_add_boolean_reg(data, "in", "crit_alarm",
 						      in_index,
@@ -1088,14 +1088,14 @@
 	if (info->func[0] & PMBUS_HAVE_IIN) {
 		i0 = data->num_sensors;
 		pmbus_add_label(data, "curr", in_index, "iin", 0);
-		pmbus_add_sensor(data, "curr", "input", in_index,
-				 0, PMBUS_READ_IIN, PSC_CURRENT_IN, true);
+		pmbus_add_sensor(data, "curr", "input", in_index, 0,
+				 PMBUS_READ_IIN, PSC_CURRENT_IN, true, true);
 		if (pmbus_check_word_register(client, 0,
 					      PMBUS_IIN_OC_WARN_LIMIT)) {
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "curr", "max", in_index,
 					 0, PMBUS_IIN_OC_WARN_LIMIT,
-					 PSC_CURRENT_IN, false);
+					 PSC_CURRENT_IN, false, false);
 			if (info->func[0] & PMBUS_HAVE_STATUS_INPUT) {
 				pmbus_add_boolean_reg(data, "curr", "max_alarm",
 						      in_index,
@@ -1108,7 +1108,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "curr", "crit", in_index,
 					 0, PMBUS_IIN_OC_FAULT_LIMIT,
-					 PSC_CURRENT_IN, false);
+					 PSC_CURRENT_IN, false, false);
 			if (info->func[0] & PMBUS_HAVE_STATUS_INPUT)
 				pmbus_add_boolean_reg(data, "curr",
 						      "crit_alarm",
@@ -1131,13 +1131,13 @@
 		i0 = data->num_sensors;
 		pmbus_add_label(data, "curr", in_index, "iout", page + 1);
 		pmbus_add_sensor(data, "curr", "input", in_index, page,
-				 PMBUS_READ_IOUT, PSC_CURRENT_OUT, true);
+				 PMBUS_READ_IOUT, PSC_CURRENT_OUT, true, true);
 		if (pmbus_check_word_register(client, page,
 					      PMBUS_IOUT_OC_WARN_LIMIT)) {
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "curr", "max", in_index, page,
 					 PMBUS_IOUT_OC_WARN_LIMIT,
-					 PSC_CURRENT_OUT, false);
+					 PSC_CURRENT_OUT, false, false);
 			if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) {
 				pmbus_add_boolean_reg(data, "curr", "max_alarm",
 						      in_index,
@@ -1151,7 +1151,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "curr", "lcrit", in_index, page,
 					 PMBUS_IOUT_UC_FAULT_LIMIT,
-					 PSC_CURRENT_OUT, false);
+					 PSC_CURRENT_OUT, false, false);
 			if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) {
 				pmbus_add_boolean_reg(data, "curr",
 						      "lcrit_alarm",
@@ -1166,7 +1166,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "curr", "crit", in_index, page,
 					 PMBUS_IOUT_OC_FAULT_LIMIT,
-					 PSC_CURRENT_OUT, false);
+					 PSC_CURRENT_OUT, false, false);
 			if (info->func[page] & PMBUS_HAVE_STATUS_IOUT) {
 				pmbus_add_boolean_reg(data, "curr",
 						      "crit_alarm",
@@ -1199,13 +1199,13 @@
 		i0 = data->num_sensors;
 		pmbus_add_label(data, "power", in_index, "pin", 0);
 		pmbus_add_sensor(data, "power", "input", in_index,
-				 0, PMBUS_READ_PIN, PSC_POWER, true);
+				 0, PMBUS_READ_PIN, PSC_POWER, true, true);
 		if (pmbus_check_word_register(client, 0,
 					      PMBUS_PIN_OP_WARN_LIMIT)) {
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "power", "max", in_index,
 					 0, PMBUS_PIN_OP_WARN_LIMIT, PSC_POWER,
-					 false);
+					 false, false);
 			if (info->func[0] & PMBUS_HAVE_STATUS_INPUT)
 				pmbus_add_boolean_reg(data, "power",
 						      "alarm",
@@ -1228,7 +1228,7 @@
 		i0 = data->num_sensors;
 		pmbus_add_label(data, "power", in_index, "pout", page + 1);
 		pmbus_add_sensor(data, "power", "input", in_index, page,
-				 PMBUS_READ_POUT, PSC_POWER, true);
+				 PMBUS_READ_POUT, PSC_POWER, true, true);
 		/*
 		 * Per hwmon sysfs API, power_cap is to be used to limit output
 		 * power.
@@ -1241,7 +1241,8 @@
 		if (pmbus_check_word_register(client, page, PMBUS_POUT_MAX)) {
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "power", "cap", in_index, page,
-					 PMBUS_POUT_MAX, PSC_POWER, false);
+					 PMBUS_POUT_MAX, PSC_POWER,
+					 false, false);
 			need_alarm = true;
 		}
 		if (pmbus_check_word_register(client, page,
@@ -1249,7 +1250,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "power", "max", in_index, page,
 					 PMBUS_POUT_OP_WARN_LIMIT, PSC_POWER,
-					 false);
+					 false, false);
 			need_alarm = true;
 		}
 		if (need_alarm && (info->func[page] & PMBUS_HAVE_STATUS_IOUT))
@@ -1264,7 +1265,7 @@
 			i1 = data->num_sensors;
 			pmbus_add_sensor(data, "power", "crit", in_index, page,
 					 PMBUS_POUT_OP_FAULT_LIMIT, PSC_POWER,
-					 false);
+					 false, false);
 			if (info->func[page] & PMBUS_HAVE_STATUS_IOUT)
 				pmbus_add_boolean_reg(data, "power",
 						      "crit_alarm",
@@ -1302,7 +1303,7 @@
 			i0 = data->num_sensors;
 			pmbus_add_sensor(data, "temp", "input", in_index, page,
 					 pmbus_temp_registers[t],
-					 PSC_TEMPERATURE, true);
+					 PSC_TEMPERATURE, true, true);
 
 			/*
 			 * PMBus provides only one status register for TEMP1-3.
@@ -1323,7 +1324,7 @@
 				i1 = data->num_sensors;
 				pmbus_add_sensor(data, "temp", "min", in_index,
 						 page, PMBUS_UT_WARN_LIMIT,
-						 PSC_TEMPERATURE, true);
+						 PSC_TEMPERATURE, true, false);
 				if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) {
 					pmbus_add_boolean_cmp(data, "temp",
 						"min_alarm", in_index, i1, i0,
@@ -1338,7 +1339,7 @@
 				pmbus_add_sensor(data, "temp", "lcrit",
 						 in_index, page,
 						 PMBUS_UT_FAULT_LIMIT,
-						 PSC_TEMPERATURE, true);
+						 PSC_TEMPERATURE, true, false);
 				if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) {
 					pmbus_add_boolean_cmp(data, "temp",
 						"lcrit_alarm", in_index, i1, i0,
@@ -1352,7 +1353,7 @@
 				i1 = data->num_sensors;
 				pmbus_add_sensor(data, "temp", "max", in_index,
 						 page, PMBUS_OT_WARN_LIMIT,
-						 PSC_TEMPERATURE, true);
+						 PSC_TEMPERATURE, true, false);
 				if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) {
 					pmbus_add_boolean_cmp(data, "temp",
 						"max_alarm", in_index, i0, i1,
@@ -1366,7 +1367,7 @@
 				i1 = data->num_sensors;
 				pmbus_add_sensor(data, "temp", "crit", in_index,
 						 page, PMBUS_OT_FAULT_LIMIT,
-						 PSC_TEMPERATURE, true);
+						 PSC_TEMPERATURE, true, false);
 				if (info->func[page] & PMBUS_HAVE_STATUS_TEMP) {
 					pmbus_add_boolean_cmp(data, "temp",
 						"crit_alarm", in_index, i0, i1,
@@ -1421,7 +1422,8 @@
 
 			i0 = data->num_sensors;
 			pmbus_add_sensor(data, "fan", "input", in_index, page,
-					 pmbus_fan_registers[f], PSC_FAN, true);
+					 pmbus_fan_registers[f], PSC_FAN, true,
+					 true);
 
 			/*
 			 * Each fan status register covers multiple fans,

diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig
index eb4af28..1f29bab 100644
--- a/drivers/hwspinlock/Kconfig
+++ b/drivers/hwspinlock/Kconfig

@@ -4,6 +4,7 @@
 
 config HWSPINLOCK
 	tristate "Generic Hardware Spinlock framework"
+	depends on ARCH_OMAP4
 	help
 	  Say y here to support the generic hardware spinlock framework.
 	  You only need to enable this if you have hardware spinlock module

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index f407784..0e406d73 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c

@@ -440,6 +440,7 @@
 	struct ide_host *host = hwif->host;
 	struct request	*rq = NULL;
 	ide_startstop_t	startstop;
+	unsigned long queue_run_ms = 3; /* old plug delay */
 
 	spin_unlock_irq(q->queue_lock);
 
@@ -459,6 +460,9 @@
 		prev_port = hwif->host->cur_port;
 		if (drive->dev_flags & IDE_DFLAG_SLEEPING &&
 		    time_after(drive->sleep, jiffies)) {
+			unsigned long left = jiffies - drive->sleep;
+
+			queue_run_ms = jiffies_to_msecs(left + 1);
 			ide_unlock_port(hwif);
 			goto plug_device;
 		}
@@ -547,8 +551,10 @@
 plug_device_2:
 	spin_lock_irq(q->queue_lock);
 
-	if (rq)
+	if (rq) {
 		blk_requeue_request(q, rq);
+		blk_delay_queue(q, queue_run_ms);
+	}
 }
 
 void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
@@ -562,6 +568,10 @@
 		blk_requeue_request(q, rq);
 
 	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	/* Use 3ms as that was the old plug delay */
+	if (rq)
+		blk_delay_queue(q, 3);
 }
 
 static int drive_is_ready(ide_drive_t *drive)

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index c7a6213..fbe1973 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c

@@ -625,7 +625,7 @@
 
 	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
 				    !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
-				    MLX4_PROTOCOL_IB);
+				    MLX4_PROT_IB_IPV6);
 	if (err)
 		return err;
 
@@ -636,7 +636,7 @@
 	return 0;
 
 err_add:
-	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROTOCOL_IB);
+	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6);
 	return err;
 }
 
@@ -666,7 +666,7 @@
 	struct mlx4_ib_gid_entry *ge;
 
 	err = mlx4_multicast_detach(mdev->dev,
-				    &mqp->mqp, gid->raw, MLX4_PROTOCOL_IB);
+				    &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6);
 	if (err)
 		return err;
 
@@ -721,7 +721,6 @@
 	if (err)
 		goto out;
 
-	dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
 	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
 
 out:
@@ -954,7 +953,7 @@
 	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
 		oldnd = iboe->netdevs[port - 1];
 		iboe->netdevs[port - 1] =
-			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROTOCOL_EN, port);
+			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
 		if (oldnd != iboe->netdevs[port - 1]) {
 			if (iboe->netdevs[port - 1])
 				netdev_added(ibdev, port);
@@ -1207,7 +1206,7 @@
 	.add		= mlx4_ib_add,
 	.remove		= mlx4_ib_remove,
 	.event		= mlx4_ib_event,
-	.protocol	= MLX4_PROTOCOL_IB
+	.protocol	= MLX4_PROT_IB_IPV6
 };
 
 static int __init mlx4_ib_init(void)

diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index ef32915..cfa3a2b 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c

@@ -1116,7 +1116,7 @@
 		return rc;
 	}
 
-	if (netif_is_bond_slave(netdev))
+	if (netif_is_bond_slave(nesvnic->netdev))
 		netdev = nesvnic->netdev->master;
 	else
 		netdev = nesvnic->netdev;

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 112ec55..434fd80 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig

@@ -641,7 +641,7 @@
 
 config TOUCHSCREEN_TSC2005
         tristate "TSC2005 based touchscreens"
-        depends on SPI_MASTER
+        depends on SPI_MASTER && GENERIC_HARDIRQS
         help
           Say Y here if you have a TSC2005 based touchscreen.
 

diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c
index 8742061..cbf0ff3 100644
--- a/drivers/input/touchscreen/tsc2005.c
+++ b/drivers/input/touchscreen/tsc2005.c

@@ -358,7 +358,7 @@
 	if (ts->esd_timeout && ts->set_reset) {
 		ts->last_valid_interrupt = jiffies;
 		schedule_delayed_work(&ts->esd_work,
-				round_jiffies(jiffies +
+				round_jiffies_relative(
 					msecs_to_jiffies(ts->esd_timeout)));
 	}
 
@@ -477,7 +477,14 @@
 	int error;
 	u16 r;
 
-	mutex_lock(&ts->mutex);
+	if (!mutex_trylock(&ts->mutex)) {
+		/*
+		 * If the mutex is taken, it means that disable or enable is in
+		 * progress. In that case just reschedule the work. If the work
+		 * is not needed, it will be canceled by disable.
+		 */
+		goto reschedule;
+	}
 
 	if (time_is_after_jiffies(ts->last_valid_interrupt +
 				  msecs_to_jiffies(ts->esd_timeout)))
@@ -510,11 +517,12 @@
 	tsc2005_start_scan(ts);
 
 out:
+	mutex_unlock(&ts->mutex);
+reschedule:
 	/* re-arm the watchdog */
 	schedule_delayed_work(&ts->esd_work,
-			      round_jiffies(jiffies +
+			      round_jiffies_relative(
 					msecs_to_jiffies(ts->esd_timeout)));
-	mutex_unlock(&ts->mutex);
 }
 
 static int tsc2005_open(struct input_dev *input)
@@ -663,7 +671,7 @@
 		goto err_remove_sysfs;
 	}
 
-	set_irq_wake(spi->irq, 1);
+	irq_set_irq_wake(spi->irq, 1);
 	return 0;
 
 err_remove_sysfs:

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index c41eb61..4bebae7 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c

@@ -231,6 +231,26 @@
 }
 EXPORT_SYMBOL_GPL(led_trigger_event);
 
+void led_trigger_blink(struct led_trigger *trigger,
+		       unsigned long *delay_on,
+		       unsigned long *delay_off)
+{
+	struct list_head *entry;
+
+	if (!trigger)
+		return;
+
+	read_lock(&trigger->leddev_list_lock);
+	list_for_each(entry, &trigger->led_cdevs) {
+		struct led_classdev *led_cdev;
+
+		led_cdev = list_entry(entry, struct led_classdev, trig_list);
+		led_blink_set(led_cdev, delay_on, delay_off);
+	}
+	read_unlock(&trigger->leddev_list_lock);
+}
+EXPORT_SYMBOL_GPL(led_trigger_blink);
+
 void led_trigger_register_simple(const char *name, struct led_trigger **tp)
 {
 	struct led_trigger *trigger;

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 98d9ec8..8420129 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig

@@ -327,4 +327,10 @@
 	---help---
 	Generate udev events for DM events.
 
+config DM_FLAKEY
+       tristate "Flakey target (EXPERIMENTAL)"
+       depends on BLK_DEV_DM && EXPERIMENTAL
+       ---help---
+         A target that intermittently fails I/O for debugging purposes.
+
 endif # MD

diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index d013860..448838b 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile

@@ -29,6 +29,7 @@
 obj-$(CONFIG_BLK_DEV_DM)	+= dm-mod.o
 obj-$(CONFIG_DM_CRYPT)		+= dm-crypt.o
 obj-$(CONFIG_DM_DELAY)		+= dm-delay.o
+obj-$(CONFIG_DM_FLAKEY)		+= dm-flakey.o
 obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o dm-round-robin.o
 obj-$(CONFIG_DM_MULTIPATH_QL)	+= dm-queue-length.o
 obj-$(CONFIG_DM_MULTIPATH_ST)	+= dm-service-time.o

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 2c62c11..c8827ff 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c

@@ -1324,20 +1324,29 @@
 
 static int crypt_set_key(struct crypt_config *cc, char *key)
 {
+	int r = -EINVAL;
+	int key_string_len = strlen(key);
+
 	/* The key size may not be changed. */
-	if (cc->key_size != (strlen(key) >> 1))
-		return -EINVAL;
+	if (cc->key_size != (key_string_len >> 1))
+		goto out;
 
 	/* Hyphen (which gives a key_size of zero) means there is no key. */
 	if (!cc->key_size && strcmp(key, "-"))
-		return -EINVAL;
+		goto out;
 
 	if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0)
-		return -EINVAL;
+		goto out;
 
 	set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
 
-	return crypt_setkey_allcpus(cc);
+	r = crypt_setkey_allcpus(cc);
+
+out:
+	/* Hex key string not needed after here, so wipe it. */
+	memset(key, '0', key_string_len);
+
+	return r;
 }
 
 static int crypt_wipe_key(struct crypt_config *cc)

diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
new file mode 100644
index 0000000..ea79062
--- /dev/null
+++ b/drivers/md/dm-flakey.c

@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2003 Sistina Software (UK) Limited.
+ * Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/device-mapper.h>
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+
+#define DM_MSG_PREFIX "flakey"
+
+/*
+ * Flakey: Used for testing only, simulates intermittent,
+ * catastrophic device failure.
+ */
+struct flakey_c {
+	struct dm_dev *dev;
+	unsigned long start_time;
+	sector_t start;
+	unsigned up_interval;
+	unsigned down_interval;
+};
+
+/*
+ * Construct a flakey mapping: <dev_path> <offset> <up interval> <down interval>
+ */
+static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	struct flakey_c *fc;
+	unsigned long long tmp;
+
+	if (argc != 4) {
+		ti->error = "dm-flakey: Invalid argument count";
+		return -EINVAL;
+	}
+
+	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+	if (!fc) {
+		ti->error = "dm-flakey: Cannot allocate linear context";
+		return -ENOMEM;
+	}
+	fc->start_time = jiffies;
+
+	if (sscanf(argv[1], "%llu", &tmp) != 1) {
+		ti->error = "dm-flakey: Invalid device sector";
+		goto bad;
+	}
+	fc->start = tmp;
+
+	if (sscanf(argv[2], "%u", &fc->up_interval) != 1) {
+		ti->error = "dm-flakey: Invalid up interval";
+		goto bad;
+	}
+
+	if (sscanf(argv[3], "%u", &fc->down_interval) != 1) {
+		ti->error = "dm-flakey: Invalid down interval";
+		goto bad;
+	}
+
+	if (!(fc->up_interval + fc->down_interval)) {
+		ti->error = "dm-flakey: Total (up + down) interval is zero";
+		goto bad;
+	}
+
+	if (fc->up_interval + fc->down_interval < fc->up_interval) {
+		ti->error = "dm-flakey: Interval overflow";
+		goto bad;
+	}
+
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) {
+		ti->error = "dm-flakey: Device lookup failed";
+		goto bad;
+	}
+
+	ti->num_flush_requests = 1;
+	ti->private = fc;
+	return 0;
+
+bad:
+	kfree(fc);
+	return -EINVAL;
+}
+
+static void flakey_dtr(struct dm_target *ti)
+{
+	struct flakey_c *fc = ti->private;
+
+	dm_put_device(ti, fc->dev);
+	kfree(fc);
+}
+
+static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector)
+{
+	struct flakey_c *fc = ti->private;
+
+	return fc->start + (bi_sector - ti->begin);
+}
+
+static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
+{
+	struct flakey_c *fc = ti->private;
+
+	bio->bi_bdev = fc->dev->bdev;
+	if (bio_sectors(bio))
+		bio->bi_sector = flakey_map_sector(ti, bio->bi_sector);
+}
+
+static int flakey_map(struct dm_target *ti, struct bio *bio,
+		      union map_info *map_context)
+{
+	struct flakey_c *fc = ti->private;
+	unsigned elapsed;
+
+	/* Are we alive ? */
+	elapsed = (jiffies - fc->start_time) / HZ;
+	if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval)
+		return -EIO;
+
+	flakey_map_bio(ti, bio);
+
+	return DM_MAPIO_REMAPPED;
+}
+
+static int flakey_status(struct dm_target *ti, status_type_t type,
+			 char *result, unsigned int maxlen)
+{
+	struct flakey_c *fc = ti->private;
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		result[0] = '\0';
+		break;
+
+	case STATUSTYPE_TABLE:
+		snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name,
+			 (unsigned long long)fc->start, fc->up_interval,
+			 fc->down_interval);
+		break;
+	}
+	return 0;
+}
+
+static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg)
+{
+	struct flakey_c *fc = ti->private;
+
+	return __blkdev_driver_ioctl(fc->dev->bdev, fc->dev->mode, cmd, arg);
+}
+
+static int flakey_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
+			struct bio_vec *biovec, int max_size)
+{
+	struct flakey_c *fc = ti->private;
+	struct request_queue *q = bdev_get_queue(fc->dev->bdev);
+
+	if (!q->merge_bvec_fn)
+		return max_size;
+
+	bvm->bi_bdev = fc->dev->bdev;
+	bvm->bi_sector = flakey_map_sector(ti, bvm->bi_sector);
+
+	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
+}
+
+static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
+{
+	struct flakey_c *fc = ti->private;
+
+	return fn(ti, fc->dev, fc->start, ti->len, data);
+}
+
+static struct target_type flakey_target = {
+	.name   = "flakey",
+	.version = {1, 1, 0},
+	.module = THIS_MODULE,
+	.ctr    = flakey_ctr,
+	.dtr    = flakey_dtr,
+	.map    = flakey_map,
+	.status = flakey_status,
+	.ioctl	= flakey_ioctl,
+	.merge	= flakey_merge,
+	.iterate_devices = flakey_iterate_devices,
+};
+
+static int __init dm_flakey_init(void)
+{
+	int r = dm_register_target(&flakey_target);
+
+	if (r < 0)
+		DMERR("register failed %d", r);
+
+	return r;
+}
+
+static void __exit dm_flakey_exit(void)
+{
+	dm_unregister_target(&flakey_target);
+}
+
+/* Module hooks */
+module_init(dm_flakey_init);
+module_exit(dm_flakey_exit);
+
+MODULE_DESCRIPTION(DM_NAME " flakey target");
+MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 6d12775..4cacdad 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c

@@ -1501,14 +1501,10 @@
 	return r;
 }
 
-static void free_params(struct dm_ioctl *param)
-{
-	vfree(param);
-}
-
 static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
 {
 	struct dm_ioctl tmp, *dmi;
+	int secure_data;
 
 	if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data)))
 		return -EFAULT;
@@ -1516,17 +1512,30 @@
 	if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data)))
 		return -EINVAL;
 
-	dmi = vmalloc(tmp.data_size);
-	if (!dmi)
-		return -ENOMEM;
+	secure_data = tmp.flags & DM_SECURE_DATA_FLAG;
 
-	if (copy_from_user(dmi, user, tmp.data_size)) {
-		vfree(dmi);
-		return -EFAULT;
+	dmi = vmalloc(tmp.data_size);
+	if (!dmi) {
+		if (secure_data && clear_user(user, tmp.data_size))
+			return -EFAULT;
+		return -ENOMEM;
 	}
 
+	if (copy_from_user(dmi, user, tmp.data_size))
+		goto bad;
+
+	/* Wipe the user buffer so we do not return it to userspace */
+	if (secure_data && clear_user(user, tmp.data_size))
+		goto bad;
+
 	*param = dmi;
 	return 0;
+
+bad:
+	if (secure_data)
+		memset(dmi, 0, tmp.data_size);
+	vfree(dmi);
+	return -EFAULT;
 }
 
 static int validate_params(uint cmd, struct dm_ioctl *param)
@@ -1534,6 +1543,7 @@
 	/* Always clear this flag */
 	param->flags &= ~DM_BUFFER_FULL_FLAG;
 	param->flags &= ~DM_UEVENT_GENERATED_FLAG;
+	param->flags &= ~DM_SECURE_DATA_FLAG;
 
 	/* Ignores parameters */
 	if (cmd == DM_REMOVE_ALL_CMD ||
@@ -1561,10 +1571,11 @@
 static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
 {
 	int r = 0;
+	int wipe_buffer;
 	unsigned int cmd;
 	struct dm_ioctl *uninitialized_var(param);
 	ioctl_fn fn = NULL;
-	size_t param_size;
+	size_t input_param_size;
 
 	/* only root can play with this */
 	if (!capable(CAP_SYS_ADMIN))
@@ -1611,13 +1622,15 @@
 	if (r)
 		return r;
 
+	input_param_size = param->data_size;
+	wipe_buffer = param->flags & DM_SECURE_DATA_FLAG;
+
 	r = validate_params(cmd, param);
 	if (r)
 		goto out;
 
-	param_size = param->data_size;
 	param->data_size = sizeof(*param);
-	r = fn(param, param_size);
+	r = fn(param, input_param_size);
 
 	/*
 	 * Copy the results back to userland.
@@ -1625,8 +1638,11 @@
 	if (!r && copy_to_user(user, param, param->data_size))
 		r = -EFAULT;
 
- out:
-	free_params(param);
+out:
+	if (wipe_buffer)
+		memset(param, 0, input_param_size);
+
+	vfree(param);
 	return r;
 }
 

diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 57968eb..a1f3218 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c

@@ -543,7 +543,7 @@
 		return -EINVAL;
 	}
 
-	r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &dev);
+	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev);
 	if (r)
 		return r;
 

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 4b0b63c..a550a05 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c

@@ -844,8 +844,8 @@
 {
 	/* target parameters */
 	static struct param _params[] = {
-		{1, 1024, "invalid number of priority groups"},
-		{1, 1024, "invalid initial priority group number"},
+		{0, 1024, "invalid number of priority groups"},
+		{0, 1024, "invalid initial priority group number"},
 	};
 
 	int r;
@@ -879,6 +879,13 @@
 	if (r)
 		goto bad;
 
+	if ((!m->nr_priority_groups && next_pg_num) ||
+	    (m->nr_priority_groups && !next_pg_num)) {
+		ti->error = "invalid initial priority group";
+		r = -EINVAL;
+		goto bad;
+	}
+
 	/* parse the priority groups */
 	while (as.argc) {
 		struct priority_group *pg;
@@ -1065,7 +1072,7 @@
 static int action_dev(struct multipath *m, struct dm_dev *dev,
 		      action_fn action)
 {
-	int r = 0;
+	int r = -EINVAL;
 	struct pgpath *pgpath;
 	struct priority_group *pg;
 
@@ -1415,7 +1422,7 @@
 	else if (m->current_pg)
 		pg_num = m->current_pg->pg_num;
 	else
-			pg_num = 1;
+		pg_num = (m->nr_priority_groups ? 1 : 0);
 
 	DMEMIT("%u ", pg_num);
 
@@ -1669,7 +1676,7 @@
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 2, 0},
+	.version = {1, 3, 0},
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index fdde53c..a2d3309 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c

@@ -1080,7 +1080,7 @@
 	argv++;
 	argc--;
 
-	r = dm_get_device(ti, cow_path, FMODE_READ | FMODE_WRITE, &s->cow);
+	r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
 	if (r) {
 		ti->error = "Cannot get COW device";
 		goto bad_cow;

diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index dddfa14..3d80cf0 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c

@@ -396,9 +396,29 @@
 	blk_limits_io_opt(limits, chunk_size * sc->stripes);
 }
 
+static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
+			struct bio_vec *biovec, int max_size)
+{
+	struct stripe_c *sc = ti->private;
+	sector_t bvm_sector = bvm->bi_sector;
+	uint32_t stripe;
+	struct request_queue *q;
+
+	stripe_map_sector(sc, bvm_sector, &stripe, &bvm_sector);
+
+	q = bdev_get_queue(sc->stripe[stripe].dev->bdev);
+	if (!q->merge_bvec_fn)
+		return max_size;
+
+	bvm->bi_bdev = sc->stripe[stripe].dev->bdev;
+	bvm->bi_sector = sc->stripe[stripe].physical_start + bvm_sector;
+
+	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
+}
+
 static struct target_type stripe_target = {
 	.name   = "striped",
-	.version = {1, 3, 1},
+	.version = {1, 4, 0},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
@@ -407,6 +427,7 @@
 	.status = stripe_status,
 	.iterate_devices = stripe_iterate_devices,
 	.io_hints = stripe_io_hints,
+	.merge  = stripe_merge,
 };
 
 int __init dm_stripe_init(void)

diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c
index 9be6a83..ac0e42b 100644
--- a/drivers/media/rc/ite-cir.c
+++ b/drivers/media/rc/ite-cir.c

@@ -187,7 +187,7 @@
 	sample_period = dev->params.sample_period;
 	ldata = (unsigned long *)data;
 	size = length << 3;
-	next_one = generic_find_next_le_bit(ldata, size, 0);
+	next_one = find_next_bit_le(ldata, size, 0);
 	if (next_one > 0) {
 		ev.pulse = true;
 		ev.duration =
@@ -196,14 +196,14 @@
 	}
 
 	while (next_one < size) {
-		next_zero = generic_find_next_zero_le_bit(ldata, size, next_one + 1);
+		next_zero = find_next_zero_bit_le(ldata, size, next_one + 1);
 		ev.pulse = false;
 		ev.duration = ITE_BITS_TO_NS(next_zero - next_one, sample_period);
 		ir_raw_event_store_with_filter(dev->rdev, &ev);
 
 		if (next_zero < size) {
 			next_one =
-			    generic_find_next_le_bit(ldata,
+			    find_next_bit_le(ldata,
 						     size,
 						     next_zero + 1);
 			ev.pulse = true;

diff --git a/drivers/memstick/host/Kconfig b/drivers/memstick/host/Kconfig
index 4ce5c8d..cc0997a 100644
--- a/drivers/memstick/host/Kconfig
+++ b/drivers/memstick/host/Kconfig

@@ -30,3 +30,15 @@
 
           To compile this driver as a module, choose M here: the
 	  module will be called jmb38x_ms.
+
+config MEMSTICK_R592
+	tristate "Ricoh R5C592 MemoryStick interface support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && PCI
+
+	help
+	  Say Y here if you want to be able to access MemoryStick cards with
+	  the Ricoh R5C592 MemoryStick card reader (which is part of 5 in one
+		multifunction reader)
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called r592.

diff --git a/drivers/memstick/host/Makefile b/drivers/memstick/host/Makefile
index a1815e9..31ba8d3 100644
--- a/drivers/memstick/host/Makefile
+++ b/drivers/memstick/host/Makefile

@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_MEMSTICK_TIFM_MS)		+= tifm_ms.o
 obj-$(CONFIG_MEMSTICK_JMICRON_38X)	+= jmb38x_ms.o
+obj-$(CONFIG_MEMSTICK_R592)		+= r592.o

diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c
new file mode 100644
index 0000000..767406c
--- /dev/null
+++ b/drivers/memstick/host/r592.c

@@ -0,0 +1,908 @@
+/*
+ * Copyright (C) 2010 - Maxim Levitsky
+ * driver for Ricoh memstick readers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/freezer.h>
+#include <linux/jiffies.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/highmem.h>
+#include <asm/byteorder.h>
+#include <linux/swab.h>
+#include "r592.h"
+
+static int enable_dma = 1;
+static int debug;
+
+static const char *tpc_names[] = {
+	"MS_TPC_READ_MG_STATUS",
+	"MS_TPC_READ_LONG_DATA",
+	"MS_TPC_READ_SHORT_DATA",
+	"MS_TPC_READ_REG",
+	"MS_TPC_READ_QUAD_DATA",
+	"INVALID",
+	"MS_TPC_GET_INT",
+	"MS_TPC_SET_RW_REG_ADRS",
+	"MS_TPC_EX_SET_CMD",
+	"MS_TPC_WRITE_QUAD_DATA",
+	"MS_TPC_WRITE_REG",
+	"MS_TPC_WRITE_SHORT_DATA",
+	"MS_TPC_WRITE_LONG_DATA",
+	"MS_TPC_SET_CMD",
+};
+
+/**
+ * memstick_debug_get_tpc_name - debug helper that returns string for
+ * a TPC number
+ */
+const char *memstick_debug_get_tpc_name(int tpc)
+{
+	return tpc_names[tpc-1];
+}
+EXPORT_SYMBOL(memstick_debug_get_tpc_name);
+
+
+/* Read a register*/
+static inline u32 r592_read_reg(struct r592_device *dev, int address)
+{
+	u32 value = readl(dev->mmio + address);
+	dbg_reg("reg #%02d == 0x%08x", address, value);
+	return value;
+}
+
+/* Write a register */
+static inline void r592_write_reg(struct r592_device *dev,
+							int address, u32 value)
+{
+	dbg_reg("reg #%02d <- 0x%08x", address, value);
+	writel(value, dev->mmio + address);
+}
+
+/* Reads a big endian DWORD register */
+static inline u32 r592_read_reg_raw_be(struct r592_device *dev, int address)
+{
+	u32 value = __raw_readl(dev->mmio + address);
+	dbg_reg("reg #%02d == 0x%08x", address, value);
+	return be32_to_cpu(value);
+}
+
+/* Writes a big endian DWORD register */
+static inline void r592_write_reg_raw_be(struct r592_device *dev,
+							int address, u32 value)
+{
+	dbg_reg("reg #%02d <- 0x%08x", address, value);
+	__raw_writel(cpu_to_be32(value), dev->mmio + address);
+}
+
+/* Set specific bits in a register (little endian) */
+static inline void r592_set_reg_mask(struct r592_device *dev,
+							int address, u32 mask)
+{
+	u32 reg = readl(dev->mmio + address);
+	dbg_reg("reg #%02d |= 0x%08x (old =0x%08x)", address, mask, reg);
+	writel(reg | mask , dev->mmio + address);
+}
+
+/* Clear specific bits in a register (little endian) */
+static inline void r592_clear_reg_mask(struct r592_device *dev,
+						int address, u32 mask)
+{
+	u32 reg = readl(dev->mmio + address);
+	dbg_reg("reg #%02d &= 0x%08x (old = 0x%08x, mask = 0x%08x)",
+						address, ~mask, reg, mask);
+	writel(reg & ~mask, dev->mmio + address);
+}
+
+
+/* Wait for status bits while checking for errors */
+static int r592_wait_status(struct r592_device *dev, u32 mask, u32 wanted_mask)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(1000);
+	u32 reg = r592_read_reg(dev, R592_STATUS);
+
+	if ((reg & mask) == wanted_mask)
+		return 0;
+
+	while (time_before(jiffies, timeout)) {
+
+		reg = r592_read_reg(dev, R592_STATUS);
+
+		if ((reg & mask) == wanted_mask)
+			return 0;
+
+		if (reg & (R592_STATUS_SEND_ERR | R592_STATUS_RECV_ERR))
+			return -EIO;
+
+		cpu_relax();
+	}
+	return -ETIME;
+}
+
+
+/* Enable/disable device */
+static int r592_enable_device(struct r592_device *dev, bool enable)
+{
+	dbg("%sabling the device", enable ? "en" : "dis");
+
+	if (enable) {
+
+		/* Power up the card */
+		r592_write_reg(dev, R592_POWER, R592_POWER_0 | R592_POWER_1);
+
+		/* Perform a reset */
+		r592_set_reg_mask(dev, R592_IO, R592_IO_RESET);
+
+		msleep(100);
+	} else
+		/* Power down the card */
+		r592_write_reg(dev, R592_POWER, 0);
+
+	return 0;
+}
+
+/* Set serial/parallel mode */
+static int r592_set_mode(struct r592_device *dev, bool parallel_mode)
+{
+	if (!parallel_mode) {
+		dbg("switching to serial mode");
+
+		/* Set serial mode */
+		r592_write_reg(dev, R592_IO_MODE, R592_IO_MODE_SERIAL);
+
+		r592_clear_reg_mask(dev, R592_POWER, R592_POWER_20);
+
+	} else {
+		dbg("switching to parallel mode");
+
+		/* This setting should be set _before_ switch TPC */
+		r592_set_reg_mask(dev, R592_POWER, R592_POWER_20);
+
+		r592_clear_reg_mask(dev, R592_IO,
+			R592_IO_SERIAL1 | R592_IO_SERIAL2);
+
+		/* Set the parallel mode now */
+		r592_write_reg(dev, R592_IO_MODE, R592_IO_MODE_PARALLEL);
+	}
+
+	dev->parallel_mode = parallel_mode;
+	return 0;
+}
+
+/* Perform a controller reset without powering down the card */
+static void r592_host_reset(struct r592_device *dev)
+{
+	r592_set_reg_mask(dev, R592_IO, R592_IO_RESET);
+	msleep(100);
+	r592_set_mode(dev, dev->parallel_mode);
+}
+
+/* Disable all hardware interrupts */
+static void r592_clear_interrupts(struct r592_device *dev)
+{
+	/* Disable & ACK all interrupts */
+	r592_clear_reg_mask(dev, R592_REG_MSC, IRQ_ALL_ACK_MASK);
+	r592_clear_reg_mask(dev, R592_REG_MSC, IRQ_ALL_EN_MASK);
+}
+
+/* Tests if there is an CRC error */
+static int r592_test_io_error(struct r592_device *dev)
+{
+	if (!(r592_read_reg(dev, R592_STATUS) &
+		(R592_STATUS_SEND_ERR | R592_STATUS_RECV_ERR)))
+		return 0;
+
+	return -EIO;
+}
+
+/* Ensure that FIFO is ready for use */
+static int r592_test_fifo_empty(struct r592_device *dev)
+{
+	if (r592_read_reg(dev, R592_REG_MSC) & R592_REG_MSC_FIFO_EMPTY)
+		return 0;
+
+	dbg("FIFO not ready, trying to reset the device");
+	r592_host_reset(dev);
+
+	if (r592_read_reg(dev, R592_REG_MSC) & R592_REG_MSC_FIFO_EMPTY)
+		return 0;
+
+	message("FIFO still not ready, giving up");
+	return -EIO;
+}
+
+/* Activates the DMA transfer from to FIFO */
+static void r592_start_dma(struct r592_device *dev, bool is_write)
+{
+	unsigned long flags;
+	u32 reg;
+	spin_lock_irqsave(&dev->irq_lock, flags);
+
+	/* Ack interrupts (just in case) + enable them */
+	r592_clear_reg_mask(dev, R592_REG_MSC, DMA_IRQ_ACK_MASK);
+	r592_set_reg_mask(dev, R592_REG_MSC, DMA_IRQ_EN_MASK);
+
+	/* Set DMA address */
+	r592_write_reg(dev, R592_FIFO_DMA, sg_dma_address(&dev->req->sg));
+
+	/* Enable the DMA */
+	reg = r592_read_reg(dev, R592_FIFO_DMA_SETTINGS);
+	reg |= R592_FIFO_DMA_SETTINGS_EN;
+
+	if (!is_write)
+		reg |= R592_FIFO_DMA_SETTINGS_DIR;
+	else
+		reg &= ~R592_FIFO_DMA_SETTINGS_DIR;
+	r592_write_reg(dev, R592_FIFO_DMA_SETTINGS, reg);
+
+	spin_unlock_irqrestore(&dev->irq_lock, flags);
+}
+
+/* Cleanups DMA related settings */
+static void r592_stop_dma(struct r592_device *dev, int error)
+{
+	r592_clear_reg_mask(dev, R592_FIFO_DMA_SETTINGS,
+		R592_FIFO_DMA_SETTINGS_EN);
+
+	/* This is only a precation */
+	r592_write_reg(dev, R592_FIFO_DMA,
+			dev->dummy_dma_page_physical_address);
+
+	r592_clear_reg_mask(dev, R592_REG_MSC, DMA_IRQ_EN_MASK);
+	r592_clear_reg_mask(dev, R592_REG_MSC, DMA_IRQ_ACK_MASK);
+	dev->dma_error = error;
+}
+
+/* Test if hardware supports DMA */
+static void r592_check_dma(struct r592_device *dev)
+{
+	dev->dma_capable = enable_dma &&
+		(r592_read_reg(dev, R592_FIFO_DMA_SETTINGS) &
+			R592_FIFO_DMA_SETTINGS_CAP);
+}
+
+/* Transfers fifo contents in/out using DMA */
+static int r592_transfer_fifo_dma(struct r592_device *dev)
+{
+	int len, sg_count;
+	bool is_write;
+
+	if (!dev->dma_capable || !dev->req->long_data)
+		return -EINVAL;
+
+	len = dev->req->sg.length;
+	is_write = dev->req->data_dir == WRITE;
+
+	if (len != R592_LFIFO_SIZE)
+		return -EINVAL;
+
+	dbg_verbose("doing dma transfer");
+
+	dev->dma_error = 0;
+	INIT_COMPLETION(dev->dma_done);
+
+	/* TODO: hidden assumption about nenth beeing always 1 */
+	sg_count = dma_map_sg(&dev->pci_dev->dev, &dev->req->sg, 1, is_write ?
+		PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
+
+	if (sg_count != 1 ||
+			(sg_dma_len(&dev->req->sg) < dev->req->sg.length)) {
+		message("problem in dma_map_sg");
+		return -EIO;
+	}
+
+	r592_start_dma(dev, is_write);
+
+	/* Wait for DMA completion */
+	if (!wait_for_completion_timeout(
+			&dev->dma_done, msecs_to_jiffies(1000))) {
+		message("DMA timeout");
+		r592_stop_dma(dev, -ETIMEDOUT);
+	}
+
+	dma_unmap_sg(&dev->pci_dev->dev, &dev->req->sg, 1, is_write ?
+		PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
+
+
+	return dev->dma_error;
+}
+
+/*
+ * Writes the FIFO in 4 byte chunks.
+ * If length isn't 4 byte aligned, rest of the data if put to a fifo
+ * to be written later
+ * Use r592_flush_fifo_write to flush that fifo when writing for the
+ * last time
+ */
+static void r592_write_fifo_pio(struct r592_device *dev,
+					unsigned char *buffer, int len)
+{
+	/* flush spill from former write */
+	if (!kfifo_is_empty(&dev->pio_fifo)) {
+
+		u8 tmp[4] = {0};
+		int copy_len = kfifo_in(&dev->pio_fifo, buffer, len);
+
+		if (!kfifo_is_full(&dev->pio_fifo))
+			return;
+		len -= copy_len;
+		buffer += copy_len;
+
+		copy_len = kfifo_out(&dev->pio_fifo, tmp, 4);
+		WARN_ON(copy_len != 4);
+		r592_write_reg_raw_be(dev, R592_FIFO_PIO, *(u32 *)tmp);
+	}
+
+	WARN_ON(!kfifo_is_empty(&dev->pio_fifo));
+
+	/* write full dwords */
+	while (len >= 4) {
+		r592_write_reg_raw_be(dev, R592_FIFO_PIO, *(u32 *)buffer);
+		buffer += 4;
+		len -= 4;
+	}
+
+	/* put remaining bytes to the spill */
+	if (len)
+		kfifo_in(&dev->pio_fifo, buffer, len);
+}
+
+/* Flushes the temporary FIFO used to make aligned DWORD writes */
+static void r592_flush_fifo_write(struct r592_device *dev)
+{
+	u8 buffer[4] = { 0 };
+	int len;
+
+	if (kfifo_is_empty(&dev->pio_fifo))
+		return;
+
+	len = kfifo_out(&dev->pio_fifo, buffer, 4);
+	r592_write_reg_raw_be(dev, R592_FIFO_PIO, *(u32 *)buffer);
+}
+
+/*
+ * Read a fifo in 4 bytes chunks.
+ * If input doesn't fit the buffer, it places bytes of last dword in spill
+ * buffer, so that they don't get lost on last read, just throw these away.
+ */
+static void r592_read_fifo_pio(struct r592_device *dev,
+						unsigned char *buffer, int len)
+{
+	u8 tmp[4];
+
+	/* Read from last spill */
+	if (!kfifo_is_empty(&dev->pio_fifo)) {
+		int bytes_copied =
+			kfifo_out(&dev->pio_fifo, buffer, min(4, len));
+		buffer += bytes_copied;
+		len -= bytes_copied;
+
+		if (!kfifo_is_empty(&dev->pio_fifo))
+			return;
+	}
+
+	/* Reads dwords from FIFO */
+	while (len >= 4) {
+		*(u32 *)buffer = r592_read_reg_raw_be(dev, R592_FIFO_PIO);
+		buffer += 4;
+		len -= 4;
+	}
+
+	if (len) {
+		*(u32 *)tmp = r592_read_reg_raw_be(dev, R592_FIFO_PIO);
+		kfifo_in(&dev->pio_fifo, tmp, 4);
+		len -= kfifo_out(&dev->pio_fifo, buffer, len);
+	}
+
+	WARN_ON(len);
+	return;
+}
+
+/* Transfers actual data using PIO. */
+static int r592_transfer_fifo_pio(struct r592_device *dev)
+{
+	unsigned long flags;
+
+	bool is_write = dev->req->tpc >= MS_TPC_SET_RW_REG_ADRS;
+	struct sg_mapping_iter miter;
+
+	kfifo_reset(&dev->pio_fifo);
+
+	if (!dev->req->long_data) {
+		if (is_write) {
+			r592_write_fifo_pio(dev, dev->req->data,
+							dev->req->data_len);
+			r592_flush_fifo_write(dev);
+		} else
+			r592_read_fifo_pio(dev, dev->req->data,
+							dev->req->data_len);
+		return 0;
+	}
+
+	local_irq_save(flags);
+	sg_miter_start(&miter, &dev->req->sg, 1, SG_MITER_ATOMIC |
+		(is_write ? SG_MITER_FROM_SG : SG_MITER_TO_SG));
+
+	/* Do the transfer fifo<->memory*/
+	while (sg_miter_next(&miter))
+		if (is_write)
+			r592_write_fifo_pio(dev, miter.addr, miter.length);
+		else
+			r592_read_fifo_pio(dev, miter.addr, miter.length);
+
+
+	/* Write last few non aligned bytes*/
+	if (is_write)
+		r592_flush_fifo_write(dev);
+
+	sg_miter_stop(&miter);
+	local_irq_restore(flags);
+	return 0;
+}
+
+/* Executes one TPC (data is read/written from small or large fifo) */
+static void r592_execute_tpc(struct r592_device *dev)
+{
+	bool is_write = dev->req->tpc >= MS_TPC_SET_RW_REG_ADRS;
+	int len, error;
+	u32 status, reg;
+
+	if (!dev->req) {
+		message("BUG: tpc execution without request!");
+		return;
+	}
+
+	len = dev->req->long_data ?
+		dev->req->sg.length : dev->req->data_len;
+
+	/* Ensure that FIFO can hold the input data */
+	if (len > R592_LFIFO_SIZE) {
+		message("IO: hardware doesn't support TPCs longer that 512");
+		error = -ENOSYS;
+		goto out;
+	}
+
+	if (!(r592_read_reg(dev, R592_REG_MSC) & R592_REG_MSC_PRSNT)) {
+		dbg("IO: refusing to send TPC because card is absent");
+		error = -ENODEV;
+		goto out;
+	}
+
+	dbg("IO: executing %s LEN=%d",
+			memstick_debug_get_tpc_name(dev->req->tpc), len);
+
+	/* Set IO direction */
+	if (is_write)
+		r592_set_reg_mask(dev, R592_IO, R592_IO_DIRECTION);
+	else
+		r592_clear_reg_mask(dev, R592_IO, R592_IO_DIRECTION);
+
+
+	error = r592_test_fifo_empty(dev);
+	if (error)
+		goto out;
+
+	/* Transfer write data */
+	if (is_write) {
+		error = r592_transfer_fifo_dma(dev);
+		if (error == -EINVAL)
+			error = r592_transfer_fifo_pio(dev);
+	}
+
+	if (error)
+		goto out;
+
+	/* Trigger the TPC */
+	reg = (len << R592_TPC_EXEC_LEN_SHIFT) |
+		(dev->req->tpc << R592_TPC_EXEC_TPC_SHIFT) |
+			R592_TPC_EXEC_BIG_FIFO;
+
+	r592_write_reg(dev, R592_TPC_EXEC, reg);
+
+	/* Wait for TPC completion */
+	status = R592_STATUS_RDY;
+	if (dev->req->need_card_int)
+		status |= R592_STATUS_CED;
+
+	error = r592_wait_status(dev, status, status);
+	if (error) {
+		message("card didn't respond");
+		goto out;
+	}
+
+	/* Test IO errors */
+	error = r592_test_io_error(dev);
+	if (error) {
+		dbg("IO error");
+		goto out;
+	}
+
+	/* Read data from FIFO */
+	if (!is_write) {
+		error = r592_transfer_fifo_dma(dev);
+		if (error == -EINVAL)
+			error = r592_transfer_fifo_pio(dev);
+	}
+
+	/* read INT reg. This can be shortened with shifts, but that way
+		its more readable */
+	if (dev->parallel_mode && dev->req->need_card_int) {
+
+		dev->req->int_reg = 0;
+		status = r592_read_reg(dev, R592_STATUS);
+
+		if (status & R592_STATUS_P_CMDNACK)
+			dev->req->int_reg |= MEMSTICK_INT_CMDNAK;
+		if (status & R592_STATUS_P_BREQ)
+			dev->req->int_reg |= MEMSTICK_INT_BREQ;
+		if (status & R592_STATUS_P_INTERR)
+			dev->req->int_reg |= MEMSTICK_INT_ERR;
+		if (status & R592_STATUS_P_CED)
+			dev->req->int_reg |= MEMSTICK_INT_CED;
+	}
+
+	if (error)
+		dbg("FIFO read error");
+out:
+	dev->req->error = error;
+	r592_clear_reg_mask(dev, R592_REG_MSC, R592_REG_MSC_LED);
+	return;
+}
+
+/* Main request processing thread */
+static int r592_process_thread(void *data)
+{
+	int error;
+	struct r592_device *dev = (struct r592_device *)data;
+	unsigned long flags;
+
+	while (!kthread_should_stop()) {
+		spin_lock_irqsave(&dev->io_thread_lock, flags);
+		set_current_state(TASK_INTERRUPTIBLE);
+		error = memstick_next_req(dev->host, &dev->req);
+		spin_unlock_irqrestore(&dev->io_thread_lock, flags);
+
+		if (error) {
+			if (error == -ENXIO || error == -EAGAIN) {
+				dbg_verbose("IO: done IO, sleeping");
+			} else {
+				dbg("IO: unknown error from "
+					"memstick_next_req %d", error);
+			}
+
+			if (kthread_should_stop())
+				set_current_state(TASK_RUNNING);
+
+			schedule();
+		} else {
+			set_current_state(TASK_RUNNING);
+			r592_execute_tpc(dev);
+		}
+	}
+	return 0;
+}
+
+/* Reprogram chip to detect change in card state */
+/* eg, if card is detected, arm it to detect removal, and vice versa */
+static void r592_update_card_detect(struct r592_device *dev)
+{
+	u32 reg = r592_read_reg(dev, R592_REG_MSC);
+	bool card_detected = reg & R592_REG_MSC_PRSNT;
+
+	dbg("update card detect. card state: %s", card_detected ?
+		"present" : "absent");
+
+	reg &= ~((R592_REG_MSC_IRQ_REMOVE | R592_REG_MSC_IRQ_INSERT) << 16);
+
+	if (card_detected)
+		reg |= (R592_REG_MSC_IRQ_REMOVE << 16);
+	else
+		reg |= (R592_REG_MSC_IRQ_INSERT << 16);
+
+	r592_write_reg(dev, R592_REG_MSC, reg);
+}
+
+/* Timer routine that fires 1 second after last card detection event, */
+static void r592_detect_timer(long unsigned int data)
+{
+	struct r592_device *dev = (struct r592_device *)data;
+	r592_update_card_detect(dev);
+	memstick_detect_change(dev->host);
+}
+
+/* Interrupt handler */
+static irqreturn_t r592_irq(int irq, void *data)
+{
+	struct r592_device *dev = (struct r592_device *)data;
+	irqreturn_t ret = IRQ_NONE;
+	u32 reg;
+	u16 irq_enable, irq_status;
+	unsigned long flags;
+	int error;
+
+	spin_lock_irqsave(&dev->irq_lock, flags);
+
+	reg = r592_read_reg(dev, R592_REG_MSC);
+	irq_enable = reg >> 16;
+	irq_status = reg & 0xFFFF;
+
+	/* Ack the interrupts */
+	reg &= ~irq_status;
+	r592_write_reg(dev, R592_REG_MSC, reg);
+
+	/* Get the IRQ status minus bits that aren't enabled */
+	irq_status &= (irq_enable);
+
+	/* Due to limitation of memstick core, we don't look at bits that
+		indicate that card was removed/inserted and/or present */
+	if (irq_status & (R592_REG_MSC_IRQ_INSERT | R592_REG_MSC_IRQ_REMOVE)) {
+
+		bool card_was_added = irq_status & R592_REG_MSC_IRQ_INSERT;
+		ret = IRQ_HANDLED;
+
+		message("IRQ: card %s", card_was_added ? "added" : "removed");
+
+		mod_timer(&dev->detect_timer,
+			jiffies + msecs_to_jiffies(card_was_added ? 500 : 50));
+	}
+
+	if (irq_status &
+		(R592_REG_MSC_FIFO_DMA_DONE | R592_REG_MSC_FIFO_DMA_ERR)) {
+		ret = IRQ_HANDLED;
+
+		if (irq_status & R592_REG_MSC_FIFO_DMA_ERR) {
+			message("IRQ: DMA error");
+			error = -EIO;
+		} else {
+			dbg_verbose("IRQ: dma done");
+			error = 0;
+		}
+
+		r592_stop_dma(dev, error);
+		complete(&dev->dma_done);
+	}
+
+	spin_unlock_irqrestore(&dev->irq_lock, flags);
+	return ret;
+}
+
+/* External inteface: set settings */
+static int r592_set_param(struct memstick_host *host,
+			enum memstick_param param, int value)
+{
+	struct r592_device *dev = memstick_priv(host);
+
+	switch (param) {
+	case MEMSTICK_POWER:
+		switch (value) {
+		case MEMSTICK_POWER_ON:
+			return r592_enable_device(dev, true);
+		case MEMSTICK_POWER_OFF:
+			return r592_enable_device(dev, false);
+		default:
+			return -EINVAL;
+		}
+	case MEMSTICK_INTERFACE:
+		switch (value) {
+		case MEMSTICK_SERIAL:
+			return r592_set_mode(dev, 0);
+		case MEMSTICK_PAR4:
+			return r592_set_mode(dev, 1);
+		default:
+			return -EINVAL;
+		}
+	default:
+		return -EINVAL;
+	}
+}
+
+/* External interface: submit requests */
+static void r592_submit_req(struct memstick_host *host)
+{
+	struct r592_device *dev = memstick_priv(host);
+	unsigned long flags;
+
+	if (dev->req)
+		return;
+
+	spin_lock_irqsave(&dev->io_thread_lock, flags);
+	if (wake_up_process(dev->io_thread))
+		dbg_verbose("IO thread woken to process requests");
+	spin_unlock_irqrestore(&dev->io_thread_lock, flags);
+}
+
+static const struct pci_device_id r592_pci_id_tbl[] = {
+
+	{ PCI_VDEVICE(RICOH, 0x0592), },
+	{ },
+};
+
+/* Main entry */
+static int r592_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int error = -ENOMEM;
+	struct memstick_host *host;
+	struct r592_device *dev;
+
+	/* Allocate memory */
+	host = memstick_alloc_host(sizeof(struct r592_device), &pdev->dev);
+	if (!host)
+		goto error1;
+
+	dev = memstick_priv(host);
+	dev->host = host;
+	dev->pci_dev = pdev;
+	pci_set_drvdata(pdev, dev);
+
+	/* pci initialization */
+	error = pci_enable_device(pdev);
+	if (error)
+		goto error2;
+
+	pci_set_master(pdev);
+	error = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (error)
+		goto error3;
+
+	error = pci_request_regions(pdev, DRV_NAME);
+	if (error)
+		goto error3;
+
+	dev->mmio = pci_ioremap_bar(pdev, 0);
+	if (!dev->mmio)
+		goto error4;
+
+	dev->irq = pdev->irq;
+	spin_lock_init(&dev->irq_lock);
+	spin_lock_init(&dev->io_thread_lock);
+	init_completion(&dev->dma_done);
+	INIT_KFIFO(dev->pio_fifo);
+	setup_timer(&dev->detect_timer,
+		r592_detect_timer, (long unsigned int)dev);
+
+	/* Host initialization */
+	host->caps = MEMSTICK_CAP_PAR4;
+	host->request = r592_submit_req;
+	host->set_param = r592_set_param;
+	r592_check_dma(dev);
+
+	dev->io_thread = kthread_run(r592_process_thread, dev, "r592_io");
+	if (IS_ERR(dev->io_thread)) {
+		error = PTR_ERR(dev->io_thread);
+		goto error5;
+	}
+
+	/* This is just a precation, so don't fail */
+	dev->dummy_dma_page = pci_alloc_consistent(pdev, PAGE_SIZE,
+		&dev->dummy_dma_page_physical_address);
+	r592_stop_dma(dev , 0);
+
+	if (request_irq(dev->irq, &r592_irq, IRQF_SHARED,
+			  DRV_NAME, dev))
+		goto error6;
+
+	r592_update_card_detect(dev);
+	if (memstick_add_host(host))
+		goto error7;
+
+	message("driver succesfully loaded");
+	return 0;
+error7:
+	free_irq(dev->irq, dev);
+error6:
+	if (dev->dummy_dma_page)
+		pci_free_consistent(pdev, PAGE_SIZE, dev->dummy_dma_page,
+			dev->dummy_dma_page_physical_address);
+
+	kthread_stop(dev->io_thread);
+error5:
+	iounmap(dev->mmio);
+error4:
+	pci_release_regions(pdev);
+error3:
+	pci_disable_device(pdev);
+error2:
+	memstick_free_host(host);
+error1:
+	return error;
+}
+
+static void r592_remove(struct pci_dev *pdev)
+{
+	int error = 0;
+	struct r592_device *dev = pci_get_drvdata(pdev);
+
+	/* Stop the processing thread.
+	That ensures that we won't take any more requests */
+	kthread_stop(dev->io_thread);
+
+	r592_enable_device(dev, false);
+
+	while (!error && dev->req) {
+		dev->req->error = -ETIME;
+		error = memstick_next_req(dev->host, &dev->req);
+	}
+	memstick_remove_host(dev->host);
+
+	free_irq(dev->irq, dev);
+	iounmap(dev->mmio);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	memstick_free_host(dev->host);
+
+	if (dev->dummy_dma_page)
+		pci_free_consistent(pdev, PAGE_SIZE, dev->dummy_dma_page,
+			dev->dummy_dma_page_physical_address);
+}
+
+#ifdef CONFIG_PM
+static int r592_suspend(struct device *core_dev)
+{
+	struct pci_dev *pdev = to_pci_dev(core_dev);
+	struct r592_device *dev = pci_get_drvdata(pdev);
+
+	r592_clear_interrupts(dev);
+	memstick_suspend_host(dev->host);
+	del_timer_sync(&dev->detect_timer);
+	return 0;
+}
+
+static int r592_resume(struct device *core_dev)
+{
+	struct pci_dev *pdev = to_pci_dev(core_dev);
+	struct r592_device *dev = pci_get_drvdata(pdev);
+
+	r592_clear_interrupts(dev);
+	r592_enable_device(dev, false);
+	memstick_resume_host(dev->host);
+	r592_update_card_detect(dev);
+	return 0;
+}
+
+SIMPLE_DEV_PM_OPS(r592_pm_ops, r592_suspend, r592_resume);
+#endif
+
+MODULE_DEVICE_TABLE(pci, r592_pci_id_tbl);
+
+static struct pci_driver r852_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= r592_pci_id_tbl,
+	.probe		= r592_probe,
+	.remove		= r592_remove,
+#ifdef CONFIG_PM
+	.driver.pm	= &r592_pm_ops,
+#endif
+};
+
+static __init int r592_module_init(void)
+{
+	return pci_register_driver(&r852_pci_driver);
+}
+
+static void __exit r592_module_exit(void)
+{
+	pci_unregister_driver(&r852_pci_driver);
+}
+
+module_init(r592_module_init);
+module_exit(r592_module_exit);
+
+module_param(enable_dma, bool, S_IRUGO);
+MODULE_PARM_DESC(enable_dma, "Enable usage of the DMA (default)");
+module_param(debug, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Debug level (0-3)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Maxim Levitsky <maximlevitsky@gmail.com>");
+MODULE_DESCRIPTION("Ricoh R5C592 Memstick/Memstick PRO card reader driver");

diff --git a/drivers/memstick/host/r592.h b/drivers/memstick/host/r592.h
new file mode 100644
index 0000000..eee264e
--- /dev/null
+++ b/drivers/memstick/host/r592.h

@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2010 - Maxim Levitsky
+ * driver for Ricoh memstick readers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef R592_H
+
+#include <linux/memstick.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/kfifo.h>
+#include <linux/ctype.h>
+
+/* write to this reg (number,len) triggers TPC execution */
+#define R592_TPC_EXEC			0x00
+#define R592_TPC_EXEC_LEN_SHIFT		16		/* Bits 16..25 are TPC len */
+#define R592_TPC_EXEC_BIG_FIFO		(1 << 26)	/* If bit 26 is set, large fifo is used (reg 48) */
+#define R592_TPC_EXEC_TPC_SHIFT		28		/* Bits 28..31 are the TPC number */
+
+
+/* Window for small TPC fifo (big endian)*/
+/* reads and writes always are done in  8 byte chunks */
+/* Not used in driver, because large fifo does better job */
+#define R592_SFIFO			0x08
+
+
+/* Status register (ms int, small fifo, IO)*/
+#define R592_STATUS			0x10
+							/* Parallel INT bits */
+#define R592_STATUS_P_CMDNACK		(1 << 16)	/* INT reg: NACK (parallel mode) */
+#define R592_STATUS_P_BREQ		(1 << 17)	/* INT reg: card ready (parallel mode)*/
+#define R592_STATUS_P_INTERR		(1 << 18)	/* INT reg: int error (parallel mode)*/
+#define R592_STATUS_P_CED		(1 << 19)	/* INT reg: command done (parallel mode) */
+
+							/* Fifo status */
+#define R592_STATUS_SFIFO_FULL		(1 << 20)	/* Small Fifo almost full (last chunk is written) */
+#define R592_STATUS_SFIFO_EMPTY		(1 << 21)	/* Small Fifo empty */
+
+							/* Error detection via CRC */
+#define R592_STATUS_SEND_ERR		(1 << 24)	/* Send failed */
+#define R592_STATUS_RECV_ERR		(1 << 25)	/* Recieve failed */
+
+							/* Card state */
+#define R592_STATUS_RDY			(1 << 28)	/* RDY signal recieved */
+#define R592_STATUS_CED			(1 << 29)	/* INT: Command done (serial mode)*/
+#define R592_STATUS_SFIFO_INPUT		(1 << 30)	/* Small fifo recieved data*/
+
+#define R592_SFIFO_SIZE			32		/* total size of small fifo is 32 bytes */
+#define R592_SFIFO_PACKET		8		/* packet size of small fifo */
+
+/* IO control */
+#define R592_IO				0x18
+#define	R592_IO_16			(1 << 16)	/* Set by default, can be cleared */
+#define	R592_IO_18			(1 << 18)	/* Set by default, can be cleared */
+#define	R592_IO_SERIAL1			(1 << 20)	/* Set by default, can be cleared, (cleared on parallel) */
+#define	R592_IO_22			(1 << 22)	/* Set by default, can be cleared */
+#define R592_IO_DIRECTION		(1 << 24)	/* TPC direction (1 write 0 read) */
+#define	R592_IO_26			(1 << 26)	/* Set by default, can be cleared */
+#define	R592_IO_SERIAL2			(1 << 30)	/* Set by default, can be cleared (cleared on parallel), serial doesn't work if unset */
+#define R592_IO_RESET			(1 << 31)	/* Reset, sets defaults*/
+
+
+/* Turns hardware on/off */
+#define R592_POWER			0x20		/* bits 0-7 writeable */
+#define R592_POWER_0			(1 << 0)	/* set on start, cleared on stop - must be set*/
+#define R592_POWER_1			(1 << 1)	/* set on start, cleared on stop - must be set*/
+#define R592_POWER_3			(1 << 3)	/* must be clear */
+#define R592_POWER_20			(1 << 5)	/* set before switch to parallel */
+
+/* IO mode*/
+#define R592_IO_MODE			0x24
+#define R592_IO_MODE_SERIAL		1
+#define R592_IO_MODE_PARALLEL		3
+
+
+/* IRQ,card detection,large fifo (first word irq status, second enable) */
+/* IRQs are ACKed by clearing the bits */
+#define R592_REG_MSC			0x28
+#define R592_REG_MSC_PRSNT		(1 << 1)	/* card present (only status)*/
+#define R592_REG_MSC_IRQ_INSERT		(1 << 8)	/* detect insert / card insered */
+#define R592_REG_MSC_IRQ_REMOVE		(1 << 9)	/* detect removal / card removed */
+#define R592_REG_MSC_FIFO_EMPTY		(1 << 10)	/* fifo is empty */
+#define R592_REG_MSC_FIFO_DMA_DONE	(1 << 11)	/* dma enable / dma done */
+
+#define R592_REG_MSC_FIFO_USER_ORN	(1 << 12)	/* set if software reads empty fifo (if R592_REG_MSC_FIFO_EMPTY is set) */
+#define R592_REG_MSC_FIFO_MISMATH	(1 << 13)	/* set if amount of data in fifo doesn't match amount in TPC */
+#define R592_REG_MSC_FIFO_DMA_ERR	(1 << 14)	/* IO failure */
+#define R592_REG_MSC_LED		(1 << 15)	/* clear to turn led off (only status)*/
+
+#define DMA_IRQ_ACK_MASK \
+	(R592_REG_MSC_FIFO_DMA_DONE | R592_REG_MSC_FIFO_DMA_ERR)
+
+#define DMA_IRQ_EN_MASK (DMA_IRQ_ACK_MASK << 16)
+
+#define IRQ_ALL_ACK_MASK 0x00007F00
+#define IRQ_ALL_EN_MASK (IRQ_ALL_ACK_MASK << 16)
+
+/* DMA address for large FIFO read/writes*/
+#define R592_FIFO_DMA			0x2C
+
+/* PIO access to large FIFO (512 bytes) (big endian)*/
+#define R592_FIFO_PIO			0x30
+#define R592_LFIFO_SIZE			512		/* large fifo size */
+
+
+/* large FIFO DMA settings */
+#define R592_FIFO_DMA_SETTINGS		0x34
+#define R592_FIFO_DMA_SETTINGS_EN	(1 << 0)	/* DMA enabled */
+#define R592_FIFO_DMA_SETTINGS_DIR	(1 << 1)	/* Dma direction (1 read, 0 write) */
+#define R592_FIFO_DMA_SETTINGS_CAP	(1 << 24)	/* Dma is aviable */
+
+/* Maybe just an delay */
+/* Bits 17..19 are just number */
+/* bit 16 is set, then bit 20 is waited */
+/* time to wait is about 50 spins * 2 ^ (bits 17..19) */
+/* seems to be possible just to ignore */
+/* Probably debug register */
+#define R592_REG38			0x38
+#define R592_REG38_CHANGE		(1 << 16)	/* Start bit */
+#define R592_REG38_DONE			(1 << 20)	/* HW set this after the delay */
+#define R592_REG38_SHIFT		17
+
+/* Debug register, written (0xABCDEF00) when error happens - not used*/
+#define R592_REG_3C			0x3C
+
+struct r592_device {
+	struct pci_dev *pci_dev;
+	struct memstick_host	*host;		/* host backpointer */
+	struct memstick_request *req;		/* current request */
+
+	/* Registers, IRQ */
+	void __iomem *mmio;
+	int irq;
+	spinlock_t irq_lock;
+	spinlock_t io_thread_lock;
+	struct timer_list detect_timer;
+
+	struct task_struct *io_thread;
+	bool parallel_mode;
+
+	DECLARE_KFIFO(pio_fifo, u8, sizeof(u32));
+
+	/* DMA area */
+	int dma_capable;
+	int dma_error;
+	struct completion dma_done;
+	void *dummy_dma_page;
+	dma_addr_t dummy_dma_page_physical_address;
+
+};
+
+#define DRV_NAME "r592"
+
+
+#define message(format, ...) \
+	printk(KERN_INFO DRV_NAME ": " format "\n", ## __VA_ARGS__)
+
+#define __dbg(level, format, ...) \
+	do { \
+		if (debug >= level) \
+			printk(KERN_DEBUG DRV_NAME \
+				": " format "\n", ## __VA_ARGS__); \
+	} while (0)
+
+
+#define dbg(format, ...)		__dbg(1, format, ## __VA_ARGS__)
+#define dbg_verbose(format, ...)	__dbg(2, format, ## __VA_ARGS__)
+#define dbg_reg(format, ...)		__dbg(3, format, ## __VA_ARGS__)
+
+#endif

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 9c511c1..011cb6c 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c

@@ -416,7 +416,6 @@
 				: chip->companion;
 	unsigned char status_buf[INT_STATUS_NUM];
 	unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
-	struct irq_desc *desc;
 	int i, data, mask, ret = -EINVAL;
 	int __irq;
 
@@ -468,19 +467,17 @@
 	if (!chip->core_irq)
 		goto out;
 
-	desc = irq_to_desc(chip->core_irq);
-
 	/* register IRQ by genirq */
 	for (i = 0; i < ARRAY_SIZE(pm860x_irqs); i++) {
 		__irq = i + chip->irq_base;
-		set_irq_chip_data(__irq, chip);
-		set_irq_chip_and_handler(__irq, &pm860x_irq_chip,
+		irq_set_chip_data(__irq, chip);
+		irq_set_chip_and_handler(__irq, &pm860x_irq_chip,
 					 handle_edge_irq);
-		set_irq_nested_thread(__irq, 1);
+		irq_set_nested_thread(__irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(__irq, IRQF_VALID);
 #else
-		set_irq_noprobe(__irq);
+		irq_set_noprobe(__irq);
 #endif
 	}
 

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index a9a1af4..e986f91 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig

@@ -133,6 +133,7 @@
 	tristate "TPS61050/61052 Boost Converters"
 	depends on I2C
 	select REGULATOR
+	select MFD_CORE
 	select REGULATOR_FIXED_VOLTAGE
 	help
 	  This option enables a driver for the TP61050/TPS61052

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 47f5709..ef489f2 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile

@@ -63,7 +63,7 @@
 obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
 max8925-objs			:= max8925-core.o max8925-i2c.o
 obj-$(CONFIG_MFD_MAX8925)	+= max8925.o
-obj-$(CONFIG_MFD_MAX8997)	+= max8997.o
+obj-$(CONFIG_MFD_MAX8997)	+= max8997.o max8997-irq.o
 obj-$(CONFIG_MFD_MAX8998)	+= max8998.o max8998-irq.o
 
 pcf50633-objs			:= pcf50633-core.o pcf50633-irq.o

diff --git a/drivers/mfd/ab3550-core.c b/drivers/mfd/ab3550-core.c
index c12d042..ff86acf 100644
--- a/drivers/mfd/ab3550-core.c
+++ b/drivers/mfd/ab3550-core.c

@@ -668,7 +668,7 @@
 	struct ab3550_platform_data *plf_data;
 	bool val;
 
-	ab = get_irq_chip_data(irq);
+	ab = irq_get_chip_data(irq);
 	plf_data = ab->i2c_client[0]->dev.platform_data;
 	irq -= plf_data->irq.base;
 	val = ((ab->startup_events[irq / 8] & BIT(irq % 8)) != 0);
@@ -1296,14 +1296,14 @@
 		unsigned int irq;
 
 		irq = ab3550_plf_data->irq.base + i;
-		set_irq_chip_data(irq, ab);
-		set_irq_chip_and_handler(irq, &ab3550_irq_chip,
-			handle_simple_irq);
-		set_irq_nested_thread(irq, 1);
+		irq_set_chip_data(irq, ab);
+		irq_set_chip_and_handler(irq, &ab3550_irq_chip,
+					 handle_simple_irq);
+		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
 #else
-		set_irq_noprobe(irq);
+		irq_set_noprobe(irq);
 #endif
 	}
 

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 6e185b2..62e33e2 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c

@@ -334,14 +334,14 @@
 	int irq;
 
 	for (irq = base; irq < base + AB8500_NR_IRQS; irq++) {
-		set_irq_chip_data(irq, ab8500);
-		set_irq_chip_and_handler(irq, &ab8500_irq_chip,
+		irq_set_chip_data(irq, ab8500);
+		irq_set_chip_and_handler(irq, &ab8500_irq_chip,
 					 handle_simple_irq);
-		set_irq_nested_thread(irq, 1);
+		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
 #else
-		set_irq_noprobe(irq);
+		irq_set_noprobe(irq);
 #endif
 	}
 
@@ -357,8 +357,8 @@
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, 0);
 #endif
-		set_irq_chip_and_handler(irq, NULL, NULL);
-		set_irq_chip_data(irq, NULL);
+		irq_set_chip_and_handler(irq, NULL, NULL);
+		irq_set_chip_data(irq, NULL);
 	}
 }
 

diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 0241f08..d4a851c 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c

@@ -139,13 +139,12 @@
 
 static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
 {
+	struct asic3 *asic = irq_desc_get_handler_data(desc);
+	struct irq_data *data = irq_desc_get_irq_data(desc);
 	int iter, i;
 	unsigned long flags;
-	struct asic3 *asic;
 
-	desc->irq_data.chip->irq_ack(&desc->irq_data);
-
-	asic = get_irq_data(irq);
+	data->chip->irq_ack(irq_data);
 
 	for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) {
 		u32 status;
@@ -188,8 +187,7 @@
 					irqnr = asic->irq_base +
 						(ASIC3_GPIOS_PER_BANK * bank)
 						+ i;
-					desc = irq_to_desc(irqnr);
-					desc->handle_irq(irqnr, desc);
+					generic_handle_irq(irqnr);
 					if (asic->irq_bothedge[bank] & bit)
 						asic3_irq_flip_edge(asic, base,
 								    bit);
@@ -200,11 +198,8 @@
 		/* Handle remaining IRQs in the status register */
 		for (i = ASIC3_NUM_GPIOS; i < ASIC3_NR_IRQS; i++) {
 			/* They start at bit 4 and go up */
-			if (status & (1 << (i - ASIC3_NUM_GPIOS + 4))) {
-				desc = irq_to_desc(asic->irq_base + i);
-				desc->handle_irq(asic->irq_base + i,
-						 desc);
-			}
+			if (status & (1 << (i - ASIC3_NUM_GPIOS + 4)))
+				generic_handle_irq(asic->irq_base + i);
 		}
 	}
 
@@ -393,21 +388,21 @@
 
 	for (irq = irq_base; irq < irq_base + ASIC3_NR_IRQS; irq++) {
 		if (irq < asic->irq_base + ASIC3_NUM_GPIOS)
-			set_irq_chip(irq, &asic3_gpio_irq_chip);
+			irq_set_chip(irq, &asic3_gpio_irq_chip);
 		else
-			set_irq_chip(irq, &asic3_irq_chip);
+			irq_set_chip(irq, &asic3_irq_chip);
 
-		set_irq_chip_data(irq, asic);
-		set_irq_handler(irq, handle_level_irq);
+		irq_set_chip_data(irq, asic);
+		irq_set_handler(irq, handle_level_irq);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
 
 	asic3_write_register(asic, ASIC3_OFFSET(INTR, INT_MASK),
 			     ASIC3_INTMASK_GINTMASK);
 
-	set_irq_chained_handler(asic->irq_nr, asic3_irq_demux);
-	set_irq_type(asic->irq_nr, IRQ_TYPE_EDGE_RISING);
-	set_irq_data(asic->irq_nr, asic);
+	irq_set_chained_handler(asic->irq_nr, asic3_irq_demux);
+	irq_set_irq_type(asic->irq_nr, IRQ_TYPE_EDGE_RISING);
+	irq_set_handler_data(asic->irq_nr, asic);
 
 	return 0;
 }
@@ -421,11 +416,10 @@
 
 	for (irq = irq_base; irq < irq_base + ASIC3_NR_IRQS; irq++) {
 		set_irq_flags(irq, 0);
-		set_irq_handler(irq, NULL);
-		set_irq_chip(irq, NULL);
-		set_irq_chip_data(irq, NULL);
+		irq_set_chip_and_handler(irq, NULL, NULL);
+		irq_set_chip_data(irq, NULL);
 	}
-	set_irq_chained_handler(asic->irq_nr, NULL);
+	irq_set_chained_handler(asic->irq_nr, NULL);
 }
 
 /* GPIOs */

diff --git a/drivers/mfd/cs5535-mfd.c b/drivers/mfd/cs5535-mfd.c
index 886a068..155fa04 100644
--- a/drivers/mfd/cs5535-mfd.c
+++ b/drivers/mfd/cs5535-mfd.c

@@ -27,6 +27,7 @@
 #include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <asm/olpc.h>
 
 #define DRV_NAME "cs5535-mfd"
 
@@ -111,6 +112,20 @@
 	},
 };
 
+#ifdef CONFIG_OLPC
+static void __devinit cs5535_clone_olpc_cells(void)
+{
+	const char *acpi_clones[] = { "olpc-xo1-pm-acpi", "olpc-xo1-sci-acpi" };
+
+	if (!machine_is_olpc())
+		return;
+
+	mfd_clone_cell("cs5535-acpi", acpi_clones, ARRAY_SIZE(acpi_clones));
+}
+#else
+static void cs5535_clone_olpc_cells(void) { }
+#endif
+
 static int __devinit cs5535_mfd_probe(struct pci_dev *pdev,
 		const struct pci_device_id *id)
 {
@@ -139,6 +154,7 @@
 		dev_err(&pdev->dev, "MFD add devices failed: %d\n", err);
 		goto err_disable;
 	}
+	cs5535_clone_olpc_cells();
 
 	dev_info(&pdev->dev, "%zu devices registered.\n",
 			ARRAY_SIZE(cs5535_mfd_cells));

diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 9e2d8dd..f2f4029 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c

@@ -162,6 +162,7 @@
 
 static struct irq_chip pcap_irq_chip = {
 	.name		= "pcap",
+	.irq_disable	= pcap_mask_irq,
 	.irq_mask	= pcap_mask_irq,
 	.irq_unmask	= pcap_unmask_irq,
 };
@@ -196,17 +197,8 @@
 		local_irq_disable();
 		service = isr & ~msr;
 		for (irq = pcap->irq_base; service; service >>= 1, irq++) {
-			if (service & 1) {
-				struct irq_desc *desc = irq_to_desc(irq);
-
-				if (WARN(!desc, "Invalid PCAP IRQ %d\n", irq))
-					break;
-
-				if (desc->status & IRQ_DISABLED)
-					note_interrupt(irq, desc, IRQ_NONE);
-				else
-					desc->handle_irq(irq, desc);
-			}
+			if (service & 1)
+				generic_handle_irq(irq);
 		}
 		local_irq_enable();
 		ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
@@ -215,7 +207,7 @@
 
 static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc)
 {
-	struct pcap_chip *pcap = get_irq_data(irq);
+	struct pcap_chip *pcap = irq_get_handler_data(irq);
 
 	desc->irq_data.chip->irq_ack(&desc->irq_data);
 	queue_work(pcap->workqueue, &pcap->isr_work);
@@ -419,7 +411,7 @@
 
 	/* cleanup irqchip */
 	for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++)
-		set_irq_chip_and_handler(i, NULL, NULL);
+		irq_set_chip_and_handler(i, NULL, NULL);
 
 	destroy_workqueue(pcap->workqueue);
 
@@ -476,12 +468,12 @@
 
 	/* setup irq chip */
 	for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++) {
-		set_irq_chip_and_handler(i, &pcap_irq_chip, handle_simple_irq);
-		set_irq_chip_data(i, pcap);
+		irq_set_chip_and_handler(i, &pcap_irq_chip, handle_simple_irq);
+		irq_set_chip_data(i, pcap);
 #ifdef CONFIG_ARM
 		set_irq_flags(i, IRQF_VALID);
 #else
-		set_irq_noprobe(i);
+		irq_set_noprobe(i);
 #endif
 	}
 
@@ -490,10 +482,10 @@
 	ezx_pcap_write(pcap, PCAP_REG_ISR, PCAP_CLEAR_INTERRUPT_REGISTER);
 	pcap->msr = PCAP_MASK_ALL_INTERRUPT;
 
-	set_irq_type(spi->irq, IRQ_TYPE_EDGE_RISING);
-	set_irq_data(spi->irq, pcap);
-	set_irq_chained_handler(spi->irq, pcap_irq_handler);
-	set_irq_wake(spi->irq, 1);
+	irq_set_irq_type(spi->irq, IRQ_TYPE_EDGE_RISING);
+	irq_set_handler_data(spi->irq, pcap);
+	irq_set_chained_handler(spi->irq, pcap_irq_handler);
+	irq_set_irq_wake(spi->irq, 1);
 
 	/* ADC */
 	adc_irq = pcap_to_irq(pcap, (pdata->config & PCAP_SECOND_PORT) ?
@@ -522,7 +514,7 @@
 	free_irq(adc_irq, pcap);
 free_irqchip:
 	for (i = pcap->irq_base; i < (pcap->irq_base + PCAP_NIRQS); i++)
-		set_irq_chip_and_handler(i, NULL, NULL);
+		irq_set_chip_and_handler(i, NULL, NULL);
 /* destroy_workqueue: */
 	destroy_workqueue(pcap->workqueue);
 free_pcap:

diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index d00b6d1..bbaec0c 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c

@@ -100,7 +100,7 @@
 
 static void egpio_handler(unsigned int irq, struct irq_desc *desc)
 {
-	struct egpio_info *ei = get_irq_data(irq);
+	struct egpio_info *ei = irq_desc_get_handler_data(desc);
 	int irqpin;
 
 	/* Read current pins. */
@@ -113,9 +113,7 @@
 	for_each_set_bit(irqpin, &readval, ei->nirqs) {
 		/* Run irq handler */
 		pr_debug("got IRQ %d\n", irqpin);
-		irq = ei->irq_start + irqpin;
-		desc = irq_to_desc(irq);
-		desc->handle_irq(irq, desc);
+		generic_handle_irq(ei->irq_start + irqpin);
 	}
 }
 
@@ -346,14 +344,14 @@
 			ei->ack_write = 0;
 		irq_end = ei->irq_start + ei->nirqs;
 		for (irq = ei->irq_start; irq < irq_end; irq++) {
-			set_irq_chip(irq, &egpio_muxed_chip);
-			set_irq_chip_data(irq, ei);
-			set_irq_handler(irq, handle_simple_irq);
+			irq_set_chip_and_handler(irq, &egpio_muxed_chip,
+						 handle_simple_irq);
+			irq_set_chip_data(irq, ei);
 			set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 		}
-		set_irq_type(ei->chained_irq, IRQ_TYPE_EDGE_RISING);
-		set_irq_data(ei->chained_irq, ei);
-		set_irq_chained_handler(ei->chained_irq, egpio_handler);
+		irq_set_irq_type(ei->chained_irq, IRQ_TYPE_EDGE_RISING);
+		irq_set_handler_data(ei->chained_irq, ei);
+		irq_set_chained_handler(ei->chained_irq, egpio_handler);
 		ack_irqs(ei);
 
 		device_init_wakeup(&pdev->dev, 1);
@@ -375,11 +373,10 @@
 	if (ei->chained_irq) {
 		irq_end = ei->irq_start + ei->nirqs;
 		for (irq = ei->irq_start; irq < irq_end; irq++) {
-			set_irq_chip(irq, NULL);
-			set_irq_handler(irq, NULL);
+			irq_set_chip_and_handler(irq, NULL, NULL);
 			set_irq_flags(irq, 0);
 		}
-		set_irq_chained_handler(ei->chained_irq, NULL);
+		irq_set_chained_handler(ei->chained_irq, NULL);
 		device_init_wakeup(&pdev->dev, 0);
 	}
 	iounmap(ei->base_addr);

diff --git a/drivers/mfd/htc-i2cpld.c b/drivers/mfd/htc-i2cpld.c
index 296ad15..d55065c 100644
--- a/drivers/mfd/htc-i2cpld.c
+++ b/drivers/mfd/htc-i2cpld.c

@@ -58,6 +58,7 @@
 	uint                    irq_start;
 	int                     nirqs;
 
+	unsigned int		flow_type;
 	/*
 	 * Work structure to allow for setting values outside of any
 	 * possible interrupt context
@@ -97,12 +98,7 @@
 
 static int htcpld_set_type(struct irq_data *data, unsigned int flags)
 {
-	struct irq_desc *d = irq_to_desc(data->irq);
-
-	if (!d) {
-		pr_err("HTCPLD invalid IRQ: %d\n", data->irq);
-		return -EINVAL;
-	}
+	struct htcpld_chip *chip = irq_data_get_irq_chip_data(data);
 
 	if (flags & ~IRQ_TYPE_SENSE_MASK)
 		return -EINVAL;
@@ -111,9 +107,7 @@
 	if (flags & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH))
 		return -EINVAL;
 
-	d->status &= ~IRQ_TYPE_SENSE_MASK;
-	d->status |= flags;
-
+	chip->flow_type = flags;
 	return 0;
 }
 
@@ -135,7 +129,6 @@
 	unsigned int i;
 	unsigned long flags;
 	int irqpin;
-	struct irq_desc *desc;
 
 	if (!htcpld) {
 		pr_debug("htcpld is null in ISR\n");
@@ -195,23 +188,19 @@
 		 * associated interrupts.
 		 */
 		for (irqpin = 0; irqpin < chip->nirqs; irqpin++) {
-			unsigned oldb, newb;
-			int flags;
+			unsigned oldb, newb, type = chip->flow_type;
 
 			irq = chip->irq_start + irqpin;
-			desc = irq_to_desc(irq);
-			flags = desc->status;
 
 			/* Run the IRQ handler, but only if the bit value
 			 * changed, and the proper flags are set */
 			oldb = (old_val >> irqpin) & 1;
 			newb = (uval >> irqpin) & 1;
 
-			if ((!oldb && newb && (flags & IRQ_TYPE_EDGE_RISING)) ||
-			    (oldb && !newb &&
-			     (flags & IRQ_TYPE_EDGE_FALLING))) {
+			if ((!oldb && newb && (type & IRQ_TYPE_EDGE_RISING)) ||
+			    (oldb && !newb && (type & IRQ_TYPE_EDGE_FALLING))) {
 				pr_debug("fire IRQ %d\n", irqpin);
-				desc->handle_irq(irq, desc);
+				generic_handle_irq(irq);
 			}
 		}
 	}
@@ -359,13 +348,13 @@
 	/* Setup irq handlers */
 	irq_end = chip->irq_start + chip->nirqs;
 	for (irq = chip->irq_start; irq < irq_end; irq++) {
-		set_irq_chip(irq, &htcpld_muxed_chip);
-		set_irq_chip_data(irq, chip);
-		set_irq_handler(irq, handle_simple_irq);
+		irq_set_chip_and_handler(irq, &htcpld_muxed_chip,
+					 handle_simple_irq);
+		irq_set_chip_data(irq, chip);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 #else
-		set_irq_probe(irq);
+		irq_set_probe(irq);
 #endif
 	}
 

diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index aa518b9..a0bd0cf 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c

@@ -112,7 +112,7 @@
 
 static void jz4740_adc_irq_demux(unsigned int irq, struct irq_desc *desc)
 {
-	struct jz4740_adc *adc = get_irq_desc_data(desc);
+	struct jz4740_adc *adc = irq_desc_get_handler_data(desc);
 	uint8_t status;
 	unsigned int i;
 
@@ -310,13 +310,13 @@
 	platform_set_drvdata(pdev, adc);
 
 	for (irq = adc->irq_base; irq < adc->irq_base + 5; ++irq) {
-		set_irq_chip_data(irq, adc);
-		set_irq_chip_and_handler(irq, &jz4740_adc_irq_chip,
-		    handle_level_irq);
+		irq_set_chip_data(irq, adc);
+		irq_set_chip_and_handler(irq, &jz4740_adc_irq_chip,
+					 handle_level_irq);
 	}
 
-	set_irq_data(adc->irq, adc);
-	set_irq_chained_handler(adc->irq, jz4740_adc_irq_demux);
+	irq_set_handler_data(adc->irq, adc);
+	irq_set_chained_handler(adc->irq, jz4740_adc_irq_demux);
 
 	writeb(0x00, adc->base + JZ_REG_ADC_ENABLE);
 	writeb(0xff, adc->base + JZ_REG_ADC_CTRL);
@@ -347,8 +347,8 @@
 
 	mfd_remove_devices(&pdev->dev);
 
-	set_irq_data(adc->irq, NULL);
-	set_irq_chained_handler(adc->irq, NULL);
+	irq_set_handler_data(adc->irq, NULL);
+	irq_set_chained_handler(adc->irq, NULL);
 
 	iounmap(adc->base);
 	release_mem_region(adc->mem->start, resource_size(adc->mem));

diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 0e998dc..58cc5fd 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c

@@ -517,7 +517,6 @@
 			    struct max8925_platform_data *pdata)
 {
 	unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
-	struct irq_desc *desc;
 	int i, ret;
 	int __irq;
 
@@ -544,19 +543,18 @@
 	mutex_init(&chip->irq_lock);
 	chip->core_irq = irq;
 	chip->irq_base = pdata->irq_base;
-	desc = irq_to_desc(chip->core_irq);
 
 	/* register with genirq */
 	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
 		__irq = i + chip->irq_base;
-		set_irq_chip_data(__irq, chip);
-		set_irq_chip_and_handler(__irq, &max8925_irq_chip,
+		irq_set_chip_data(__irq, chip);
+		irq_set_chip_and_handler(__irq, &max8925_irq_chip,
 					 handle_edge_irq);
-		set_irq_nested_thread(__irq, 1);
+		irq_set_nested_thread(__irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(__irq, IRQF_VALID);
 #else
-		set_irq_noprobe(__irq);
+		irq_set_noprobe(__irq);
 #endif
 	}
 	if (!irq) {

diff --git a/drivers/mfd/max8997-irq.c b/drivers/mfd/max8997-irq.c
new file mode 100644
index 0000000..638bf7e
--- /dev/null
+++ b/drivers/mfd/max8997-irq.c

@@ -0,0 +1,377 @@
+/*
+ * max8997-irq.c - Interrupt controller support for MAX8997
+ *
+ * Copyright (C) 2011 Samsung Electronics Co.Ltd
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * This driver is based on max8998-irq.c
+ */
+
+#include <linux/err.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/max8997.h>
+#include <linux/mfd/max8997-private.h>
+
+static const u8 max8997_mask_reg[] = {
+	[PMIC_INT1] = MAX8997_REG_INT1MSK,
+	[PMIC_INT2] = MAX8997_REG_INT2MSK,
+	[PMIC_INT3] = MAX8997_REG_INT3MSK,
+	[PMIC_INT4] = MAX8997_REG_INT4MSK,
+	[FUEL_GAUGE] = MAX8997_REG_INVALID,
+	[MUIC_INT1] = MAX8997_MUIC_REG_INTMASK1,
+	[MUIC_INT2] = MAX8997_MUIC_REG_INTMASK2,
+	[MUIC_INT3] = MAX8997_MUIC_REG_INTMASK3,
+	[GPIO_LOW] = MAX8997_REG_INVALID,
+	[GPIO_HI] = MAX8997_REG_INVALID,
+	[FLASH_STATUS] = MAX8997_REG_INVALID,
+};
+
+static struct i2c_client *get_i2c(struct max8997_dev *max8997,
+				enum max8997_irq_source src)
+{
+	switch (src) {
+	case PMIC_INT1 ... PMIC_INT4:
+		return max8997->i2c;
+	case FUEL_GAUGE:
+		return NULL;
+	case MUIC_INT1 ... MUIC_INT3:
+		return max8997->muic;
+	case GPIO_LOW ... GPIO_HI:
+		return max8997->i2c;
+	case FLASH_STATUS:
+		return max8997->i2c;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+struct max8997_irq_data {
+	int mask;
+	enum max8997_irq_source group;
+};
+
+#define DECLARE_IRQ(idx, _group, _mask)		\
+	[(idx)] = { .group = (_group), .mask = (_mask) }
+static const struct max8997_irq_data max8997_irqs[] = {
+	DECLARE_IRQ(MAX8997_PMICIRQ_PWRONR,	PMIC_INT1, 1 << 0),
+	DECLARE_IRQ(MAX8997_PMICIRQ_PWRONF,	PMIC_INT1, 1 << 1),
+	DECLARE_IRQ(MAX8997_PMICIRQ_PWRON1SEC,	PMIC_INT1, 1 << 3),
+	DECLARE_IRQ(MAX8997_PMICIRQ_JIGONR,	PMIC_INT1, 1 << 4),
+	DECLARE_IRQ(MAX8997_PMICIRQ_JIGONF,	PMIC_INT1, 1 << 5),
+	DECLARE_IRQ(MAX8997_PMICIRQ_LOWBAT2,	PMIC_INT1, 1 << 6),
+	DECLARE_IRQ(MAX8997_PMICIRQ_LOWBAT1,	PMIC_INT1, 1 << 7),
+
+	DECLARE_IRQ(MAX8997_PMICIRQ_JIGR,	PMIC_INT2, 1 << 0),
+	DECLARE_IRQ(MAX8997_PMICIRQ_JIGF,	PMIC_INT2, 1 << 1),
+	DECLARE_IRQ(MAX8997_PMICIRQ_MR,		PMIC_INT2, 1 << 2),
+	DECLARE_IRQ(MAX8997_PMICIRQ_DVS1OK,	PMIC_INT2, 1 << 3),
+	DECLARE_IRQ(MAX8997_PMICIRQ_DVS2OK,	PMIC_INT2, 1 << 4),
+	DECLARE_IRQ(MAX8997_PMICIRQ_DVS3OK,	PMIC_INT2, 1 << 5),
+	DECLARE_IRQ(MAX8997_PMICIRQ_DVS4OK,	PMIC_INT2, 1 << 6),
+
+	DECLARE_IRQ(MAX8997_PMICIRQ_CHGINS,	PMIC_INT3, 1 << 0),
+	DECLARE_IRQ(MAX8997_PMICIRQ_CHGRM,	PMIC_INT3, 1 << 1),
+	DECLARE_IRQ(MAX8997_PMICIRQ_DCINOVP,	PMIC_INT3, 1 << 2),
+	DECLARE_IRQ(MAX8997_PMICIRQ_TOPOFFR,	PMIC_INT3, 1 << 3),
+	DECLARE_IRQ(MAX8997_PMICIRQ_CHGRSTF,	PMIC_INT3, 1 << 5),
+	DECLARE_IRQ(MAX8997_PMICIRQ_MBCHGTMEXPD,	PMIC_INT3, 1 << 7),
+
+	DECLARE_IRQ(MAX8997_PMICIRQ_RTC60S,	PMIC_INT4, 1 << 0),
+	DECLARE_IRQ(MAX8997_PMICIRQ_RTCA1,	PMIC_INT4, 1 << 1),
+	DECLARE_IRQ(MAX8997_PMICIRQ_RTCA2,	PMIC_INT4, 1 << 2),
+	DECLARE_IRQ(MAX8997_PMICIRQ_SMPL_INT,	PMIC_INT4, 1 << 3),
+	DECLARE_IRQ(MAX8997_PMICIRQ_RTC1S,	PMIC_INT4, 1 << 4),
+	DECLARE_IRQ(MAX8997_PMICIRQ_WTSR,	PMIC_INT4, 1 << 5),
+
+	DECLARE_IRQ(MAX8997_MUICIRQ_ADCError,	MUIC_INT1, 1 << 2),
+	DECLARE_IRQ(MAX8997_MUICIRQ_ADCLow,	MUIC_INT1, 1 << 1),
+	DECLARE_IRQ(MAX8997_MUICIRQ_ADC,	MUIC_INT1, 1 << 0),
+
+	DECLARE_IRQ(MAX8997_MUICIRQ_VBVolt,	MUIC_INT2, 1 << 4),
+	DECLARE_IRQ(MAX8997_MUICIRQ_DBChg,	MUIC_INT2, 1 << 3),
+	DECLARE_IRQ(MAX8997_MUICIRQ_DCDTmr,	MUIC_INT2, 1 << 2),
+	DECLARE_IRQ(MAX8997_MUICIRQ_ChgDetRun,	MUIC_INT2, 1 << 1),
+	DECLARE_IRQ(MAX8997_MUICIRQ_ChgTyp,	MUIC_INT2, 1 << 0),
+
+	DECLARE_IRQ(MAX8997_MUICIRQ_OVP,	MUIC_INT3, 1 << 2),
+};
+
+static void max8997_irq_lock(struct irq_data *data)
+{
+	struct max8997_dev *max8997 = irq_get_chip_data(data->irq);
+
+	mutex_lock(&max8997->irqlock);
+}
+
+static void max8997_irq_sync_unlock(struct irq_data *data)
+{
+	struct max8997_dev *max8997 = irq_get_chip_data(data->irq);
+	int i;
+
+	for (i = 0; i < MAX8997_IRQ_GROUP_NR; i++) {
+		u8 mask_reg = max8997_mask_reg[i];
+		struct i2c_client *i2c = get_i2c(max8997, i);
+
+		if (mask_reg == MAX8997_REG_INVALID ||
+				IS_ERR_OR_NULL(i2c))
+			continue;
+		max8997->irq_masks_cache[i] = max8997->irq_masks_cur[i];
+
+		max8997_write_reg(i2c, max8997_mask_reg[i],
+				max8997->irq_masks_cur[i]);
+	}
+
+	mutex_unlock(&max8997->irqlock);
+}
+
+static const inline struct max8997_irq_data *
+irq_to_max8997_irq(struct max8997_dev *max8997, int irq)
+{
+	return &max8997_irqs[irq - max8997->irq_base];
+}
+
+static void max8997_irq_mask(struct irq_data *data)
+{
+	struct max8997_dev *max8997 = irq_get_chip_data(data->irq);
+	const struct max8997_irq_data *irq_data = irq_to_max8997_irq(max8997,
+								data->irq);
+
+	max8997->irq_masks_cur[irq_data->group] |= irq_data->mask;
+}
+
+static void max8997_irq_unmask(struct irq_data *data)
+{
+	struct max8997_dev *max8997 = irq_get_chip_data(data->irq);
+	const struct max8997_irq_data *irq_data = irq_to_max8997_irq(max8997,
+								data->irq);
+
+	max8997->irq_masks_cur[irq_data->group] &= ~irq_data->mask;
+}
+
+static struct irq_chip max8997_irq_chip = {
+	.name			= "max8997",
+	.irq_bus_lock		= max8997_irq_lock,
+	.irq_bus_sync_unlock	= max8997_irq_sync_unlock,
+	.irq_mask		= max8997_irq_mask,
+	.irq_unmask		= max8997_irq_unmask,
+};
+
+#define MAX8997_IRQSRC_PMIC		(1 << 1)
+#define MAX8997_IRQSRC_FUELGAUGE	(1 << 2)
+#define MAX8997_IRQSRC_MUIC		(1 << 3)
+#define MAX8997_IRQSRC_GPIO		(1 << 4)
+#define MAX8997_IRQSRC_FLASH		(1 << 5)
+static irqreturn_t max8997_irq_thread(int irq, void *data)
+{
+	struct max8997_dev *max8997 = data;
+	u8 irq_reg[MAX8997_IRQ_GROUP_NR] = {};
+	u8 irq_src;
+	int ret;
+	int i;
+
+	ret = max8997_read_reg(max8997->i2c, MAX8997_REG_INTSRC, &irq_src);
+	if (ret < 0) {
+		dev_err(max8997->dev, "Failed to read interrupt source: %d\n",
+				ret);
+		return IRQ_NONE;
+	}
+
+	if (irq_src & MAX8997_IRQSRC_PMIC) {
+		/* PMIC INT1 ~ INT4 */
+		max8997_bulk_read(max8997->i2c, MAX8997_REG_INT1, 4,
+				&irq_reg[PMIC_INT1]);
+	}
+	if (irq_src & MAX8997_IRQSRC_FUELGAUGE) {
+		/*
+		 * TODO: FUEL GAUGE
+		 *
+		 * This is to be supported by Max17042 driver. When
+		 * an interrupt incurs here, it should be relayed to a
+		 * Max17042 device that is connected (probably by
+		 * platform-data). However, we do not have interrupt
+		 * handling in Max17042 driver currently. The Max17042 IRQ
+		 * driver should be ready to be used as a stand-alone device and
+		 * a Max8997-dependent device. Because it is not ready in
+		 * Max17042-side and it is not too critical in operating
+		 * Max8997, we do not implement this in initial releases.
+		 */
+		irq_reg[FUEL_GAUGE] = 0;
+	}
+	if (irq_src & MAX8997_IRQSRC_MUIC) {
+		/* MUIC INT1 ~ INT3 */
+		max8997_bulk_read(max8997->muic, MAX8997_MUIC_REG_INT1, 3,
+				&irq_reg[MUIC_INT1]);
+	}
+	if (irq_src & MAX8997_IRQSRC_GPIO) {
+		/* GPIO Interrupt */
+		u8 gpio_info[MAX8997_NUM_GPIO];
+
+		irq_reg[GPIO_LOW] = 0;
+		irq_reg[GPIO_HI] = 0;
+
+		max8997_bulk_read(max8997->i2c, MAX8997_REG_GPIOCNTL1,
+				MAX8997_NUM_GPIO, gpio_info);
+		for (i = 0; i < MAX8997_NUM_GPIO; i++) {
+			bool interrupt = false;
+
+			switch (gpio_info[i] & MAX8997_GPIO_INT_MASK) {
+			case MAX8997_GPIO_INT_BOTH:
+				if (max8997->gpio_status[i] != gpio_info[i])
+					interrupt = true;
+				break;
+			case MAX8997_GPIO_INT_RISE:
+				if ((max8997->gpio_status[i] != gpio_info[i]) &&
+				    (gpio_info[i] & MAX8997_GPIO_DATA_MASK))
+					interrupt = true;
+				break;
+			case MAX8997_GPIO_INT_FALL:
+				if ((max8997->gpio_status[i] != gpio_info[i]) &&
+				    !(gpio_info[i] & MAX8997_GPIO_DATA_MASK))
+					interrupt = true;
+				break;
+			default:
+				break;
+			}
+
+			if (interrupt) {
+				if (i < 8)
+					irq_reg[GPIO_LOW] |= (1 << i);
+				else
+					irq_reg[GPIO_HI] |= (1 << (i - 8));
+			}
+
+		}
+	}
+	if (irq_src & MAX8997_IRQSRC_FLASH) {
+		/* Flash Status Interrupt */
+		ret = max8997_read_reg(max8997->i2c, MAX8997_REG_FLASHSTATUS,
+				&irq_reg[FLASH_STATUS]);
+	}
+
+	/* Apply masking */
+	for (i = 0; i < MAX8997_IRQ_GROUP_NR; i++)
+		irq_reg[i] &= ~max8997->irq_masks_cur[i];
+
+	/* Report */
+	for (i = 0; i < MAX8997_IRQ_NR; i++) {
+		if (irq_reg[max8997_irqs[i].group] & max8997_irqs[i].mask)
+			handle_nested_irq(max8997->irq_base + i);
+	}
+
+	return IRQ_HANDLED;
+}
+
+int max8997_irq_resume(struct max8997_dev *max8997)
+{
+	if (max8997->irq && max8997->irq_base)
+		max8997_irq_thread(max8997->irq_base, max8997);
+	return 0;
+}
+
+int max8997_irq_init(struct max8997_dev *max8997)
+{
+	int i;
+	int cur_irq;
+	int ret;
+	u8 val;
+
+	if (!max8997->irq) {
+		dev_warn(max8997->dev, "No interrupt specified.\n");
+		max8997->irq_base = 0;
+		return 0;
+	}
+
+	if (!max8997->irq_base) {
+		dev_err(max8997->dev, "No interrupt base specified.\n");
+		return 0;
+	}
+
+	mutex_init(&max8997->irqlock);
+
+	/* Mask individual interrupt sources */
+	for (i = 0; i < MAX8997_IRQ_GROUP_NR; i++) {
+		struct i2c_client *i2c;
+
+		max8997->irq_masks_cur[i] = 0xff;
+		max8997->irq_masks_cache[i] = 0xff;
+		i2c = get_i2c(max8997, i);
+
+		if (IS_ERR_OR_NULL(i2c))
+			continue;
+		if (max8997_mask_reg[i] == MAX8997_REG_INVALID)
+			continue;
+
+		max8997_write_reg(i2c, max8997_mask_reg[i], 0xff);
+	}
+
+	for (i = 0; i < MAX8997_NUM_GPIO; i++) {
+		max8997->gpio_status[i] = (max8997_read_reg(max8997->i2c,
+						MAX8997_REG_GPIOCNTL1 + i,
+						&val)
+					& MAX8997_GPIO_DATA_MASK) ?
+					true : false;
+	}
+
+	/* Register with genirq */
+	for (i = 0; i < MAX8997_IRQ_NR; i++) {
+		cur_irq = i + max8997->irq_base;
+		irq_set_chip_data(cur_irq, max8997);
+		irq_set_chip_and_handler(cur_irq, &max8997_irq_chip,
+				handle_edge_irq);
+		irq_set_nested_thread(cur_irq, 1);
+#ifdef CONFIG_ARM
+		set_irq_flags(cur_irq, IRQF_VALID);
+#else
+		irq_set_noprobe(cur_irq);
+#endif
+	}
+
+	ret = request_threaded_irq(max8997->irq, NULL, max8997_irq_thread,
+			IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+			"max8997-irq", max8997);
+
+	if (ret) {
+		dev_err(max8997->dev, "Failed to request IRQ %d: %d\n",
+				max8997->irq, ret);
+		return ret;
+	}
+
+	if (!max8997->ono)
+		return 0;
+
+	ret = request_threaded_irq(max8997->ono, NULL, max8997_irq_thread,
+			IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING |
+			IRQF_ONESHOT, "max8997-ono", max8997);
+
+	if (ret)
+		dev_err(max8997->dev, "Failed to request ono-IRQ %d: %d\n",
+				max8997->ono, ret);
+
+	return 0;
+}
+
+void max8997_irq_exit(struct max8997_dev *max8997)
+{
+	if (max8997->ono)
+		free_irq(max8997->ono, max8997);
+
+	if (max8997->irq)
+		free_irq(max8997->irq, max8997);
+}

diff --git a/drivers/mfd/max8998-irq.c b/drivers/mfd/max8998-irq.c
index 3903e1f..5919710 100644
--- a/drivers/mfd/max8998-irq.c
+++ b/drivers/mfd/max8998-irq.c

@@ -224,14 +224,14 @@
 	/* register with genirq */
 	for (i = 0; i < MAX8998_IRQ_NR; i++) {
 		cur_irq = i + max8998->irq_base;
-		set_irq_chip_data(cur_irq, max8998);
-		set_irq_chip_and_handler(cur_irq, &max8998_irq_chip,
+		irq_set_chip_data(cur_irq, max8998);
+		irq_set_chip_and_handler(cur_irq, &max8998_irq_chip,
 					 handle_edge_irq);
-		set_irq_nested_thread(cur_irq, 1);
+		irq_set_nested_thread(cur_irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(cur_irq, IRQF_VALID);
 #else
-		set_irq_noprobe(cur_irq);
+		irq_set_noprobe(cur_irq);
 #endif
 	}
 

diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c
index c002142..9ec7570 100644
--- a/drivers/mfd/max8998.c
+++ b/drivers/mfd/max8998.c

@@ -209,7 +209,7 @@
 	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
 
 	if (max8998->wakeup)
-		set_irq_wake(max8998->irq, 1);
+		irq_set_irq_wake(max8998->irq, 1);
 	return 0;
 }
 
@@ -219,7 +219,7 @@
 	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
 
 	if (max8998->wakeup)
-		set_irq_wake(max8998->irq, 0);
+		irq_set_irq_wake(max8998->irq, 0);
 	/*
 	 * In LP3974, if IRQ registers are not "read & clear"
 	 * when it's set during sleep, the interrupt becomes

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 79eda02..d01574d 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c

@@ -184,16 +184,12 @@
 }
 EXPORT_SYMBOL(mfd_remove_devices);
 
-static int add_shared_platform_device(const char *cell, const char *name)
+int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones)
 {
 	struct mfd_cell cell_entry;
 	struct device *dev;
 	struct platform_device *pdev;
-	int err;
-
-	/* check if we've already registered a device (don't fail if we have) */
-	if (bus_find_device_by_name(&platform_bus_type, NULL, name))
-		return 0;
+	int i;
 
 	/* fetch the parent cell's device (should already be registered!) */
 	dev = bus_find_device_by_name(&platform_bus_type, NULL, cell);
@@ -206,44 +202,17 @@
 
 	WARN_ON(!cell_entry.enable);
 
-	cell_entry.name = name;
-	err = mfd_add_device(pdev->dev.parent, -1, &cell_entry, NULL, 0);
-	if (err)
-		dev_err(dev, "MFD add devices failed: %d\n", err);
-	return err;
+	for (i = 0; i < n_clones; i++) {
+		cell_entry.name = clones[i];
+		/* don't give up if a single call fails; just report error */
+		if (mfd_add_device(pdev->dev.parent, -1, &cell_entry, NULL, 0))
+			dev_err(dev, "failed to create platform device '%s'\n",
+					clones[i]);
+	}
+
+	return 0;
 }
-
-int mfd_shared_platform_driver_register(struct platform_driver *drv,
-		const char *cellname)
-{
-	int err;
-
-	err = add_shared_platform_device(cellname, drv->driver.name);
-	if (err)
-		printk(KERN_ERR "failed to add platform device %s\n",
-				drv->driver.name);
-
-	err = platform_driver_register(drv);
-	if (err)
-		printk(KERN_ERR "failed to add platform driver %s\n",
-				drv->driver.name);
-
-	return err;
-}
-EXPORT_SYMBOL(mfd_shared_platform_driver_register);
-
-void mfd_shared_platform_driver_unregister(struct platform_driver *drv)
-{
-	struct device *dev;
-
-	dev = bus_find_device_by_name(&platform_bus_type, NULL,
-			drv->driver.name);
-	if (dev)
-		platform_device_unregister(to_platform_device(dev));
-
-	platform_driver_unregister(drv);
-}
-EXPORT_SYMBOL(mfd_shared_platform_driver_unregister);
+EXPORT_SYMBOL(mfd_clone_cell);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Ian Molton, Dmitry Baryshkov");

diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index c1306ed..c7687f6 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c

@@ -356,7 +356,7 @@
 	return 0;
 }
 
-static struct i2c_device_id pcf50633_id_table[] = {
+static const struct i2c_device_id pcf50633_id_table[] = {
 	{"pcf50633", 0x73},
 	{/* end of list */}
 };

diff --git a/drivers/mfd/rdc321x-southbridge.c b/drivers/mfd/rdc321x-southbridge.c
index 193c940..10dbe63 100644
--- a/drivers/mfd/rdc321x-southbridge.c
+++ b/drivers/mfd/rdc321x-southbridge.c

@@ -97,6 +97,7 @@
 	{ PCI_DEVICE(PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030) },
 	{}
 };
+MODULE_DEVICE_TABLE(pci, rdc321x_sb_table);
 
 static struct pci_driver rdc321x_sb_driver = {
 	.name		= "RDC321x Southbridge",

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 3e5732b..7ab7746 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c

@@ -762,14 +762,14 @@
 	int irq;
 
 	for (irq = base; irq < base + num_irqs; irq++) {
-		set_irq_chip_data(irq, stmpe);
-		set_irq_chip_and_handler(irq, &stmpe_irq_chip,
+		irq_set_chip_data(irq, stmpe);
+		irq_set_chip_and_handler(irq, &stmpe_irq_chip,
 					 handle_edge_irq);
-		set_irq_nested_thread(irq, 1);
+		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
 #else
-		set_irq_noprobe(irq);
+		irq_set_noprobe(irq);
 #endif
 	}
 
@@ -786,8 +786,8 @@
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, 0);
 #endif
-		set_irq_chip_and_handler(irq, NULL, NULL);
-		set_irq_chip_data(irq, NULL);
+		irq_set_chip_and_handler(irq, NULL, NULL);
+		irq_set_chip_data(irq, NULL);
 	}
 }
 

diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index af57fc7..42830e6 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c

@@ -186,7 +186,7 @@
 /* Handle the T7L66XB interrupt mux */
 static void t7l66xb_irq(unsigned int irq, struct irq_desc *desc)
 {
-	struct t7l66xb *t7l66xb = get_irq_data(irq);
+	struct t7l66xb *t7l66xb = irq_get_handler_data(irq);
 	unsigned int isr;
 	unsigned int i, irq_base;
 
@@ -243,17 +243,16 @@
 	irq_base = t7l66xb->irq_base;
 
 	for (irq = irq_base; irq < irq_base + T7L66XB_NR_IRQS; irq++) {
-		set_irq_chip(irq, &t7l66xb_chip);
-		set_irq_chip_data(irq, t7l66xb);
-		set_irq_handler(irq, handle_level_irq);
+		irq_set_chip_and_handler(irq, &t7l66xb_chip, handle_level_irq);
+		irq_set_chip_data(irq, t7l66xb);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 #endif
 	}
 
-	set_irq_type(t7l66xb->irq, IRQ_TYPE_EDGE_FALLING);
-	set_irq_data(t7l66xb->irq, t7l66xb);
-	set_irq_chained_handler(t7l66xb->irq, t7l66xb_irq);
+	irq_set_irq_type(t7l66xb->irq, IRQ_TYPE_EDGE_FALLING);
+	irq_set_handler_data(t7l66xb->irq, t7l66xb);
+	irq_set_chained_handler(t7l66xb->irq, t7l66xb_irq);
 }
 
 static void t7l66xb_detach_irq(struct platform_device *dev)
@@ -263,15 +262,15 @@
 
 	irq_base = t7l66xb->irq_base;
 
-	set_irq_chained_handler(t7l66xb->irq, NULL);
-	set_irq_data(t7l66xb->irq, NULL);
+	irq_set_chained_handler(t7l66xb->irq, NULL);
+	irq_set_handler_data(t7l66xb->irq, NULL);
 
 	for (irq = irq_base; irq < irq_base + T7L66XB_NR_IRQS; irq++) {
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, 0);
 #endif
-		set_irq_chip(irq, NULL);
-		set_irq_chip_data(irq, NULL);
+		irq_set_chip(irq, NULL);
+		irq_set_chip_data(irq, NULL);
 	}
 }
 

diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index 729dbee..c27e515 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c

@@ -192,14 +192,14 @@
 	int irq;
 
 	for (irq = base; irq < base + TC3589x_NR_INTERNAL_IRQS; irq++) {
-		set_irq_chip_data(irq, tc3589x);
-		set_irq_chip_and_handler(irq, &dummy_irq_chip,
+		irq_set_chip_data(irq, tc3589x);
+		irq_set_chip_and_handler(irq, &dummy_irq_chip,
 					 handle_edge_irq);
-		set_irq_nested_thread(irq, 1);
+		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
 #else
-		set_irq_noprobe(irq);
+		irq_set_noprobe(irq);
 #endif
 	}
 
@@ -215,8 +215,8 @@
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, 0);
 #endif
-		set_irq_chip_and_handler(irq, NULL, NULL);
-		set_irq_chip_data(irq, NULL);
+		irq_set_chip_and_handler(irq, NULL, NULL);
+		irq_set_chip_data(irq, NULL);
 	}
 }
 

diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 3d62ded..fc53ce2 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c

@@ -513,7 +513,7 @@
 static void
 tc6393xb_irq(unsigned int irq, struct irq_desc *desc)
 {
-	struct tc6393xb *tc6393xb = get_irq_data(irq);
+	struct tc6393xb *tc6393xb = irq_get_handler_data(irq);
 	unsigned int isr;
 	unsigned int i, irq_base;
 
@@ -572,15 +572,14 @@
 	irq_base = tc6393xb->irq_base;
 
 	for (irq = irq_base; irq < irq_base + TC6393XB_NR_IRQS; irq++) {
-		set_irq_chip(irq, &tc6393xb_chip);
-		set_irq_chip_data(irq, tc6393xb);
-		set_irq_handler(irq, handle_edge_irq);
+		irq_set_chip_and_handler(irq, &tc6393xb_chip, handle_edge_irq);
+		irq_set_chip_data(irq, tc6393xb);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
 
-	set_irq_type(tc6393xb->irq, IRQ_TYPE_EDGE_FALLING);
-	set_irq_data(tc6393xb->irq, tc6393xb);
-	set_irq_chained_handler(tc6393xb->irq, tc6393xb_irq);
+	irq_set_irq_type(tc6393xb->irq, IRQ_TYPE_EDGE_FALLING);
+	irq_set_handler_data(tc6393xb->irq, tc6393xb);
+	irq_set_chained_handler(tc6393xb->irq, tc6393xb_irq);
 }
 
 static void tc6393xb_detach_irq(struct platform_device *dev)
@@ -588,15 +587,15 @@
 	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
 	unsigned int irq, irq_base;
 
-	set_irq_chained_handler(tc6393xb->irq, NULL);
-	set_irq_data(tc6393xb->irq, NULL);
+	irq_set_chained_handler(tc6393xb->irq, NULL);
+	irq_set_handler_data(tc6393xb->irq, NULL);
 
 	irq_base = tc6393xb->irq_base;
 
 	for (irq = irq_base; irq < irq_base + TC6393XB_NR_IRQS; irq++) {
 		set_irq_flags(irq, 0);
-		set_irq_chip(irq, NULL);
-		set_irq_chip_data(irq, NULL);
+		irq_set_chip(irq, NULL);
+		irq_set_chip_data(irq, NULL);
 	}
 }
 

diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 0aa9186..b600808 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c

@@ -422,10 +422,10 @@
 
 	for (i = 0; i < ARRAY_SIZE(tps6586x_irqs); i++) {
 		int __irq = i + tps6586x->irq_base;
-		set_irq_chip_data(__irq, tps6586x);
-		set_irq_chip_and_handler(__irq, &tps6586x->irq_chip,
+		irq_set_chip_data(__irq, tps6586x);
+		irq_set_chip_and_handler(__irq, &tps6586x->irq_chip,
 					 handle_simple_irq);
-		set_irq_nested_thread(__irq, 1);
+		irq_set_nested_thread(__irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(__irq, IRQF_VALID);
 #endif

diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 63a30e8..8a7ee31 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c

@@ -320,24 +320,8 @@
 		for (module_irq = twl4030_irq_base;
 				pih_isr;
 				pih_isr >>= 1, module_irq++) {
-			if (pih_isr & 0x1) {
-				struct irq_desc *d = irq_to_desc(module_irq);
-
-				if (!d) {
-					pr_err("twl4030: Invalid SIH IRQ: %d\n",
-					       module_irq);
-					return -EINVAL;
-				}
-
-				/* These can't be masked ... always warn
-				 * if we get any surprises.
-				 */
-				if (d->status & IRQ_DISABLED)
-					note_interrupt(module_irq, d,
-							IRQ_NONE);
-				else
-					d->handle_irq(module_irq, d);
-			}
+			if (pih_isr & 0x1)
+				generic_handle_irq(module_irq);
 		}
 		local_irq_enable();
 
@@ -470,7 +454,7 @@
 	set_irq_flags(irq, IRQF_VALID);
 #else
 	/* same effect on other architectures */
-	set_irq_noprobe(irq);
+	irq_set_noprobe(irq);
 #endif
 }
 
@@ -560,24 +544,18 @@
 	/* Modify only the bits we know must change */
 	while (edge_change) {
 		int		i = fls(edge_change) - 1;
-		struct irq_desc	*d = irq_to_desc(i + agent->irq_base);
+		struct irq_data	*idata = irq_get_irq_data(i + agent->irq_base);
 		int		byte = 1 + (i >> 2);
 		int		off = (i & 0x3) * 2;
-
-		if (!d) {
-			pr_err("twl4030: Invalid IRQ: %d\n",
-			       i + agent->irq_base);
-			return;
-		}
+		unsigned int	type;
 
 		bytes[byte] &= ~(0x03 << off);
 
-		raw_spin_lock_irq(&d->lock);
-		if (d->status & IRQ_TYPE_EDGE_RISING)
+		type = irqd_get_trigger_type(idata);
+		if (type & IRQ_TYPE_EDGE_RISING)
 			bytes[byte] |= BIT(off + 1);
-		if (d->status & IRQ_TYPE_EDGE_FALLING)
+		if (type & IRQ_TYPE_EDGE_FALLING)
 			bytes[byte] |= BIT(off + 0);
-		raw_spin_unlock_irq(&d->lock);
 
 		edge_change &= ~BIT(i);
 	}
@@ -626,21 +604,13 @@
 static int twl4030_sih_set_type(struct irq_data *data, unsigned trigger)
 {
 	struct sih_agent *sih = irq_data_get_irq_chip_data(data);
-	struct irq_desc *desc = irq_to_desc(data->irq);
 	unsigned long flags;
 
-	if (!desc) {
-		pr_err("twl4030: Invalid IRQ: %d\n", data->irq);
-		return -EINVAL;
-	}
-
 	if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
 		return -EINVAL;
 
 	spin_lock_irqsave(&sih_agent_lock, flags);
-	if ((desc->status & IRQ_TYPE_SENSE_MASK) != trigger) {
-		desc->status &= ~IRQ_TYPE_SENSE_MASK;
-		desc->status |= trigger;
+	if (irqd_get_trigger_type(data) != trigger) {
 		sih->edge_change |= BIT(data->irq - sih->irq_base);
 		queue_work(wq, &sih->edge_work);
 	}
@@ -680,7 +650,7 @@
  */
 static void handle_twl4030_sih(unsigned irq, struct irq_desc *desc)
 {
-	struct sih_agent *agent = get_irq_data(irq);
+	struct sih_agent *agent = irq_get_handler_data(irq);
 	const struct sih *sih = agent->sih;
 	int isr;
 
@@ -754,9 +724,9 @@
 	for (i = 0; i < sih->bits; i++) {
 		irq = irq_base + i;
 
-		set_irq_chip_and_handler(irq, &twl4030_sih_irq_chip,
-				handle_edge_irq);
-		set_irq_chip_data(irq, agent);
+		irq_set_chip_and_handler(irq, &twl4030_sih_irq_chip,
+					 handle_edge_irq);
+		irq_set_chip_data(irq, agent);
 		activate_irq(irq);
 	}
 
@@ -765,8 +735,8 @@
 
 	/* replace generic PIH handler (handle_simple_irq) */
 	irq = sih_mod + twl4030_irq_base;
-	set_irq_data(irq, agent);
-	set_irq_chained_handler(irq, handle_twl4030_sih);
+	irq_set_handler_data(irq, agent);
+	irq_set_chained_handler(irq, handle_twl4030_sih);
 
 	pr_info("twl4030: %s (irq %d) chaining IRQs %d..%d\n", sih->name,
 			irq, irq_base, twl4030_irq_next - 1);
@@ -815,8 +785,8 @@
 	twl4030_sih_irq_chip.irq_ack = dummy_irq_chip.irq_ack;
 
 	for (i = irq_base; i < irq_end; i++) {
-		set_irq_chip_and_handler(i, &twl4030_irq_chip,
-				handle_simple_irq);
+		irq_set_chip_and_handler(i, &twl4030_irq_chip,
+					 handle_simple_irq);
 		activate_irq(i);
 	}
 	twl4030_irq_next = i;
@@ -856,7 +826,7 @@
 	/* clean up twl4030_sih_setup */
 fail:
 	for (i = irq_base; i < irq_end; i++)
-		set_irq_chip_and_handler(i, NULL, NULL);
+		irq_set_chip_and_handler(i, NULL, NULL);
 	destroy_workqueue(wq);
 	wq = NULL;
 	return status;

diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c
index 4082ed7..fa93705 100644
--- a/drivers/mfd/twl6030-irq.c
+++ b/drivers/mfd/twl6030-irq.c

@@ -140,22 +140,7 @@
 			if (sts.int_sts & 0x1) {
 				int module_irq = twl6030_irq_base +
 					twl6030_interrupt_mapping[i];
-				struct irq_desc *d = irq_to_desc(module_irq);
-
-				if (!d) {
-					pr_err("twl6030: Invalid SIH IRQ: %d\n",
-					       module_irq);
-					return -EINVAL;
-				}
-
-				/* These can't be masked ... always warn
-				 * if we get any surprises.
-				 */
-				if (d->status & IRQ_DISABLED)
-					note_interrupt(module_irq, d,
-							IRQ_NONE);
-				else
-					d->handle_irq(module_irq, d);
+				generic_handle_irq(module_irq);
 
 			}
 		local_irq_enable();
@@ -198,7 +183,7 @@
 	set_irq_flags(irq, IRQF_VALID);
 #else
 	/* same effect on other architectures */
-	set_irq_noprobe(irq);
+	irq_set_noprobe(irq);
 #endif
 }
 
@@ -335,8 +320,8 @@
 	twl6030_irq_chip.irq_set_type = NULL;
 
 	for (i = irq_base; i < irq_end; i++) {
-		set_irq_chip_and_handler(i, &twl6030_irq_chip,
-				handle_simple_irq);
+		irq_set_chip_and_handler(i, &twl6030_irq_chip,
+					 handle_simple_irq);
 		activate_irq(i);
 	}
 
@@ -365,7 +350,7 @@
 
 fail_kthread:
 	for (i = irq_base; i < irq_end; i++)
-		set_irq_chip_and_handler(i, NULL, NULL);
+		irq_set_chip_and_handler(i, NULL, NULL);
 	return status;
 }
 

diff --git a/drivers/mfd/wl1273-core.c b/drivers/mfd/wl1273-core.c
index f76f6c7..04914f2 100644
--- a/drivers/mfd/wl1273-core.c
+++ b/drivers/mfd/wl1273-core.c

@@ -25,7 +25,7 @@
 
 #define DRIVER_DESC "WL1273 FM Radio Core"
 
-static struct i2c_device_id wl1273_driver_id_table[] = {
+static const struct i2c_device_id wl1273_driver_id_table[] = {
 	{ WL1273_FM_DRIVER_NAME, 0 },
 	{ }
 };

diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
index a5cd17e..23e66af 100644
--- a/drivers/mfd/wm831x-irq.c
+++ b/drivers/mfd/wm831x-irq.c

@@ -553,17 +553,17 @@
 	for (cur_irq = wm831x->irq_base;
 	     cur_irq < ARRAY_SIZE(wm831x_irqs) + wm831x->irq_base;
 	     cur_irq++) {
-		set_irq_chip_data(cur_irq, wm831x);
-		set_irq_chip_and_handler(cur_irq, &wm831x_irq_chip,
+		irq_set_chip_data(cur_irq, wm831x);
+		irq_set_chip_and_handler(cur_irq, &wm831x_irq_chip,
 					 handle_edge_irq);
-		set_irq_nested_thread(cur_irq, 1);
+		irq_set_nested_thread(cur_irq, 1);
 
 		/* ARM needs us to explicitly flag the IRQ as valid
 		 * and will set them noprobe when we do so. */
 #ifdef CONFIG_ARM
 		set_irq_flags(cur_irq, IRQF_VALID);
 #else
-		set_irq_noprobe(cur_irq);
+		irq_set_noprobe(cur_irq);
 #endif
 	}
 

diff --git a/drivers/mfd/wm8350-irq.c b/drivers/mfd/wm8350-irq.c
index 5839966..ed4b22a 100644
--- a/drivers/mfd/wm8350-irq.c
+++ b/drivers/mfd/wm8350-irq.c

@@ -518,17 +518,17 @@
 	for (cur_irq = wm8350->irq_base;
 	     cur_irq < ARRAY_SIZE(wm8350_irqs) + wm8350->irq_base;
 	     cur_irq++) {
-		set_irq_chip_data(cur_irq, wm8350);
-		set_irq_chip_and_handler(cur_irq, &wm8350_irq_chip,
+		irq_set_chip_data(cur_irq, wm8350);
+		irq_set_chip_and_handler(cur_irq, &wm8350_irq_chip,
 					 handle_edge_irq);
-		set_irq_nested_thread(cur_irq, 1);
+		irq_set_nested_thread(cur_irq, 1);
 
 		/* ARM needs us to explicitly flag the IRQ as valid
 		 * and will set them noprobe when we do so. */
 #ifdef CONFIG_ARM
 		set_irq_flags(cur_irq, IRQF_VALID);
 #else
-		set_irq_noprobe(cur_irq);
+		irq_set_noprobe(cur_irq);
 #endif
 	}
 

diff --git a/drivers/mfd/wm8994-irq.c b/drivers/mfd/wm8994-irq.c
index 1e3bf4a..71c6e8f 100644
--- a/drivers/mfd/wm8994-irq.c
+++ b/drivers/mfd/wm8994-irq.c

@@ -278,17 +278,17 @@
 	for (cur_irq = wm8994->irq_base;
 	     cur_irq < ARRAY_SIZE(wm8994_irqs) + wm8994->irq_base;
 	     cur_irq++) {
-		set_irq_chip_data(cur_irq, wm8994);
-		set_irq_chip_and_handler(cur_irq, &wm8994_irq_chip,
+		irq_set_chip_data(cur_irq, wm8994);
+		irq_set_chip_and_handler(cur_irq, &wm8994_irq_chip,
 					 handle_edge_irq);
-		set_irq_nested_thread(cur_irq, 1);
+		irq_set_nested_thread(cur_irq, 1);
 
 		/* ARM needs us to explicitly flag the IRQ as valid
 		 * and will set them noprobe when we do so. */
 #ifdef CONFIG_ARM
 		set_irq_flags(cur_irq, IRQF_VALID);
 #else
-		set_irq_noprobe(cur_irq);
+		irq_set_noprobe(cur_irq);
 #endif
 	}
 

diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 59c118c..27dc463 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c

@@ -988,7 +988,7 @@
 
 static int kgdbts_option_setup(char *opt)
 {
-	if (strlen(opt) > MAX_CONFIG_LEN) {
+	if (strlen(opt) >= MAX_CONFIG_LEN) {
 		printk(KERN_ERR "kgdbts: config string too long\n");
 		return -ENOSPC;
 	}

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 7741470..b4567c3 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig

@@ -33,14 +33,6 @@
 	  should normally be compiled as kernel modules. The modules perform
 	  various checks and verifications when loaded.
 
-config MTD_CONCAT
-	tristate "MTD concatenating support"
-	help
-	  Support for concatenating several MTD devices into a single
-	  (virtual) one. This allows you to have -for example- a JFFS(2)
-	  file system spanning multiple physical flash chips. If unsure,
-	  say 'Y'.
-
 config MTD_PARTITIONS
 	bool "MTD partitioning support"
 	help
@@ -333,6 +325,16 @@
 	  To use, add console=ttyMTDx to the kernel command line,
 	  where x is the MTD device number to use.
 
+config MTD_SWAP
+	tristate "Swap on MTD device support"
+	depends on MTD && SWAP
+	select MTD_BLKDEVS
+	help
+	  Provides volatile block device driver on top of mtd partition
+          suitable for swapping.  The mapping of written blocks is not saved.
+	  The driver provides wear leveling by storing erase counter into the
+	  OOB.
+
 source "drivers/mtd/chips/Kconfig"
 
 source "drivers/mtd/maps/Kconfig"

diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index d4e7f25..d578095 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile

@@ -4,11 +4,10 @@
 
 # Core functionality.
 obj-$(CONFIG_MTD)		+= mtd.o
-mtd-y				:= mtdcore.o mtdsuper.o
+mtd-y				:= mtdcore.o mtdsuper.o mtdconcat.o
 mtd-$(CONFIG_MTD_PARTITIONS)	+= mtdpart.o
 mtd-$(CONFIG_MTD_OF_PARTS)	+= ofpart.o
 
-obj-$(CONFIG_MTD_CONCAT)	+= mtdconcat.o
 obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o
 obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o
 obj-$(CONFIG_MTD_AFS_PARTS)	+= afs.o
@@ -26,6 +25,7 @@
 obj-$(CONFIG_SSFDC)		+= ssfdc.o
 obj-$(CONFIG_SM_FTL)		+= sm_ftl.o
 obj-$(CONFIG_MTD_OOPS)		+= mtdoops.o
+obj-$(CONFIG_MTD_SWAP)		+= mtdswap.o
 
 nftl-objs		:= nftlcore.o nftlmount.o
 inftl-objs		:= inftlcore.o inftlmount.o

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 4aaa88f..092aef1 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c

@@ -455,7 +455,7 @@
 	mtd->flags   = MTD_CAP_NORFLASH;
 	mtd->name    = map->name;
 	mtd->writesize = 1;
-	mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize;
+	mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
 
 	mtd->reboot_notifier.notifier_call = cfi_intelext_reboot;
 

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index f072fcf..f9a5331 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c

@@ -349,6 +349,7 @@
 	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri },
 #ifdef AMD_BOOTLOC_BUG
 	{ CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock },
+	{ CFI_MFR_AMIC, CFI_ID_ANY, fixup_amd_bootblock },
 	{ CFI_MFR_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock },
 #endif
 	{ CFI_MFR_AMD, 0x0050, fixup_use_secsi },
@@ -440,7 +441,7 @@
 	mtd->flags   = MTD_CAP_NORFLASH;
 	mtd->name    = map->name;
 	mtd->writesize = 1;
-	mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize;
+	mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
 
 	DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): write buffer size %d\n",
 		__func__, mtd->writebufsize);

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index c04b765..ed56ad3 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c

@@ -238,7 +238,7 @@
 	mtd->resume = cfi_staa_resume;
 	mtd->flags = MTD_CAP_NORFLASH & ~MTD_BIT_WRITEABLE;
 	mtd->writesize = 8; /* FIXME: Should be 0 for STMicro flashes w/out ECC */
-	mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize;
+	mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
 	map->fldrv = &cfi_staa_chipdrv;
 	__module_get(THIS_MODULE);
 	mtd->name = map->name;

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index e4eba6c..3fb981d 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c

@@ -655,7 +655,8 @@
 	{ "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) },
 	{ "at26df321",  INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) },
 
-	/* EON -- en25pxx */
+	/* EON -- en25xxx */
+	{ "en25f32", INFO(0x1c3116, 0, 64 * 1024,  64, SECT_4K) },
 	{ "en25p32", INFO(0x1c2016, 0, 64 * 1024,  64, 0) },
 	{ "en25p64", INFO(0x1c2017, 0, 64 * 1024, 128, 0) },
 
@@ -728,6 +729,8 @@
 	{ "m25pe80", INFO(0x208014,  0, 64 * 1024, 16,       0) },
 	{ "m25pe16", INFO(0x208015,  0, 64 * 1024, 32, SECT_4K) },
 
+	{ "m25px64", INFO(0x207117,  0, 64 * 1024, 128, 0) },
+
 	/* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
 	{ "w25x10", INFO(0xef3011, 0, 64 * 1024,  2,  SECT_4K) },
 	{ "w25x20", INFO(0xef3012, 0, 64 * 1024,  4,  SECT_4K) },

diff --git a/drivers/mtd/devices/mtdram.c b/drivers/mtd/devices/mtdram.c
index 26a6e80..1483e18 100644
--- a/drivers/mtd/devices/mtdram.c
+++ b/drivers/mtd/devices/mtdram.c

@@ -121,6 +121,7 @@
 	mtd->flags = MTD_CAP_RAM;
 	mtd->size = size;
 	mtd->writesize = 1;
+	mtd->writebufsize = 64; /* Mimic CFI NOR flashes */
 	mtd->erasesize = MTDRAM_ERASE_SIZE;
 	mtd->priv = mapped_address;
 

diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index 5239328..8d28fa0 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c

@@ -117,6 +117,7 @@
 	list_for_each_entry_safe(this, safe, &phram_list, list) {
 		del_mtd_device(&this->mtd);
 		iounmap(this->mtd.priv);
+		kfree(this->mtd.name);
 		kfree(this);
 	}
 }
@@ -275,6 +276,8 @@
 	ret = register_device(name, start, len);
 	if (!ret)
 		pr_info("%s device: %#x at %#x\n", name, len, start);
+	else
+		kfree(name);
 
 	return ret;
 }

diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 5d37d31..44b1f46 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig

@@ -114,7 +114,7 @@
 
 config MTD_SC520CDP
 	tristate "CFI Flash device mapped on AMD SC520 CDP"
-	depends on X86 && MTD_CFI && MTD_CONCAT
+	depends on X86 && MTD_CFI
 	help
 	  The SC520 CDP board has two banks of CFI-compliant chips and one
 	  Dual-in-line JEDEC chip. This 'mapping' driver supports that
@@ -262,7 +262,7 @@
 
 config MTD_DILNETPC
 	tristate "CFI Flash device mapped on DIL/Net PC"
-	depends on X86 && MTD_CONCAT && MTD_PARTITIONS && MTD_CFI_INTELEXT && BROKEN
+	depends on X86 && MTD_PARTITIONS && MTD_CFI_INTELEXT && BROKEN
 	help
 	  MTD map driver for SSV DIL/Net PC Boards "DNP" and "ADNP".
 	  For details, see <http://www.ssv-embedded.de/ssv/pc104/p169.htm>
@@ -552,4 +552,13 @@
 
 	  When built as a module, it will be called pismo.ko
 
+config MTD_LATCH_ADDR
+        tristate "Latch-assisted Flash Chip Support"
+        depends on MTD_COMPLEX_MAPPINGS
+        help
+          Map driver which allows flashes to be partially physically addressed
+          and have the upper address lines set by a board specific code.
+
+          If compiled as a module, it will be called latch-addr-flash.
+
 endmenu

diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index c7869c7..08533bd 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile

@@ -59,3 +59,4 @@
 obj-$(CONFIG_MTD_VMU)		+= vmu-flash.o
 obj-$(CONFIG_MTD_GPIO_ADDR)	+= gpio-addr-flash.o
 obj-$(CONFIG_MTD_BCM963XX)	+= bcm963xx-flash.o
+obj-$(CONFIG_MTD_LATCH_ADDR)	+= latch-addr-flash.o

diff --git a/drivers/mtd/maps/ceiva.c b/drivers/mtd/maps/ceiva.c
index c09f4f5..e5f645b 100644
--- a/drivers/mtd/maps/ceiva.c
+++ b/drivers/mtd/maps/ceiva.c

@@ -194,16 +194,10 @@
 			 * We detected multiple devices.  Concatenate
 			 * them together.
 			 */
-#ifdef CONFIG_MTD_CONCAT
 			*rmtd = mtd_concat_create(subdev, found,
 						  "clps flash");
 			if (*rmtd == NULL)
 				ret = -ENXIO;
-#else
-			printk(KERN_ERR "clps flash: multiple devices "
-			       "found but MTD concat support disabled.\n");
-			ret = -ENXIO;
-#endif
 		}
 	}
 

diff --git a/drivers/mtd/maps/integrator-flash.c b/drivers/mtd/maps/integrator-flash.c
index 2aac41b..e22ff5a 100644
--- a/drivers/mtd/maps/integrator-flash.c
+++ b/drivers/mtd/maps/integrator-flash.c

@@ -202,7 +202,6 @@
 	if (info->nr_subdev == 1)
 		info->mtd = info->subdev[0].mtd;
 	else if (info->nr_subdev > 1) {
-#ifdef CONFIG_MTD_CONCAT
 		struct mtd_info *cdev[info->nr_subdev];
 
 		/*
@@ -215,11 +214,6 @@
 					      dev_name(&dev->dev));
 		if (info->mtd == NULL)
 			err = -ENXIO;
-#else
-		printk(KERN_ERR "armflash: multiple devices found but "
-		       "MTD concat support disabled.\n");
-		err = -ENXIO;
-#endif
 	}
 
 	if (err < 0)
@@ -244,10 +238,8 @@
  cleanup:
 	if (info->mtd) {
 		del_mtd_partitions(info->mtd);
-#ifdef CONFIG_MTD_CONCAT
 		if (info->mtd != info->subdev[0].mtd)
 			mtd_concat_destroy(info->mtd);
-#endif
 	}
 	kfree(info->parts);
  subdev_err:
@@ -272,10 +264,8 @@
 	if (info) {
 		if (info->mtd) {
 			del_mtd_partitions(info->mtd);
-#ifdef CONFIG_MTD_CONCAT
 			if (info->mtd != info->subdev[0].mtd)
 				mtd_concat_destroy(info->mtd);
-#endif
 		}
 		kfree(info->parts);
 

diff --git a/drivers/mtd/maps/latch-addr-flash.c b/drivers/mtd/maps/latch-addr-flash.c
new file mode 100644
index 0000000..ee25480
--- /dev/null
+++ b/drivers/mtd/maps/latch-addr-flash.c

@@ -0,0 +1,272 @@
+/*
+ * Interface for NOR flash driver whose high address lines are latched
+ *
+ * Copyright © 2000 Nicolas Pitre <nico@cam.org>
+ * Copyright © 2005-2008 Analog Devices Inc.
+ * Copyright © 2008 MontaVista Software, Inc. <source@mvista.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/latch-addr-flash.h>
+#include <linux/slab.h>
+
+#define DRIVER_NAME "latch-addr-flash"
+
+struct latch_addr_flash_info {
+	struct mtd_info		*mtd;
+	struct map_info		map;
+	struct resource		*res;
+
+	void			(*set_window)(unsigned long offset, void *data);
+	void			*data;
+
+	/* cache; could be found out of res */
+	unsigned long		win_mask;
+
+	int			nr_parts;
+	struct mtd_partition	*parts;
+
+	spinlock_t		lock;
+};
+
+static map_word lf_read(struct map_info *map, unsigned long ofs)
+{
+	struct latch_addr_flash_info *info;
+	map_word datum;
+
+	info = (struct latch_addr_flash_info *)map->map_priv_1;
+
+	spin_lock(&info->lock);
+
+	info->set_window(ofs, info->data);
+	datum = inline_map_read(map, info->win_mask & ofs);
+
+	spin_unlock(&info->lock);
+
+	return datum;
+}
+
+static void lf_write(struct map_info *map, map_word datum, unsigned long ofs)
+{
+	struct latch_addr_flash_info *info;
+
+	info = (struct latch_addr_flash_info *)map->map_priv_1;
+
+	spin_lock(&info->lock);
+
+	info->set_window(ofs, info->data);
+	inline_map_write(map, datum, info->win_mask & ofs);
+
+	spin_unlock(&info->lock);
+}
+
+static void lf_copy_from(struct map_info *map, void *to,
+		unsigned long from, ssize_t len)
+{
+	struct latch_addr_flash_info *info =
+		(struct latch_addr_flash_info *) map->map_priv_1;
+	unsigned n;
+
+	while (len > 0) {
+		n = info->win_mask + 1 - (from & info->win_mask);
+		if (n > len)
+			n = len;
+
+		spin_lock(&info->lock);
+
+		info->set_window(from, info->data);
+		memcpy_fromio(to, map->virt + (from & info->win_mask), n);
+
+		spin_unlock(&info->lock);
+
+		to += n;
+		from += n;
+		len -= n;
+	}
+}
+
+static char *rom_probe_types[] = { "cfi_probe", NULL };
+
+static char *part_probe_types[] = { "cmdlinepart", NULL };
+
+static int latch_addr_flash_remove(struct platform_device *dev)
+{
+	struct latch_addr_flash_info *info;
+	struct latch_addr_flash_data *latch_addr_data;
+
+	info = platform_get_drvdata(dev);
+	if (info == NULL)
+		return 0;
+	platform_set_drvdata(dev, NULL);
+
+	latch_addr_data = dev->dev.platform_data;
+
+	if (info->mtd != NULL) {
+		if (mtd_has_partitions()) {
+			if (info->nr_parts) {
+				del_mtd_partitions(info->mtd);
+				kfree(info->parts);
+			} else if (latch_addr_data->nr_parts) {
+				del_mtd_partitions(info->mtd);
+			} else {
+				del_mtd_device(info->mtd);
+			}
+		} else {
+			del_mtd_device(info->mtd);
+		}
+		map_destroy(info->mtd);
+	}
+
+	if (info->map.virt != NULL)
+		iounmap(info->map.virt);
+
+	if (info->res != NULL)
+		release_mem_region(info->res->start, resource_size(info->res));
+
+	kfree(info);
+
+	if (latch_addr_data->done)
+		latch_addr_data->done(latch_addr_data->data);
+
+	return 0;
+}
+
+static int __devinit latch_addr_flash_probe(struct platform_device *dev)
+{
+	struct latch_addr_flash_data *latch_addr_data;
+	struct latch_addr_flash_info *info;
+	resource_size_t win_base = dev->resource->start;
+	resource_size_t win_size = resource_size(dev->resource);
+	char **probe_type;
+	int chipsel;
+	int err;
+
+	latch_addr_data = dev->dev.platform_data;
+	if (latch_addr_data == NULL)
+		return -ENODEV;
+
+	pr_notice("latch-addr platform flash device: %#llx byte "
+		  "window at %#.8llx\n",
+		  (unsigned long long)win_size, (unsigned long long)win_base);
+
+	chipsel = dev->id;
+
+	if (latch_addr_data->init) {
+		err = latch_addr_data->init(latch_addr_data->data, chipsel);
+		if (err != 0)
+			return err;
+	}
+
+	info = kzalloc(sizeof(struct latch_addr_flash_info), GFP_KERNEL);
+	if (info == NULL) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	platform_set_drvdata(dev, info);
+
+	info->res = request_mem_region(win_base, win_size, DRIVER_NAME);
+	if (info->res == NULL) {
+		dev_err(&dev->dev, "Could not reserve memory region\n");
+		err = -EBUSY;
+		goto free_info;
+	}
+
+	info->map.name		= DRIVER_NAME;
+	info->map.size		= latch_addr_data->size;
+	info->map.bankwidth	= latch_addr_data->width;
+
+	info->map.phys		= NO_XIP;
+	info->map.virt		= ioremap(win_base, win_size);
+	if (!info->map.virt) {
+		err = -ENOMEM;
+		goto free_res;
+	}
+
+	info->map.map_priv_1	= (unsigned long)info;
+
+	info->map.read		= lf_read;
+	info->map.copy_from	= lf_copy_from;
+	info->map.write		= lf_write;
+	info->set_window	= latch_addr_data->set_window;
+	info->data		= latch_addr_data->data;
+	info->win_mask		= win_size - 1;
+
+	spin_lock_init(&info->lock);
+
+	for (probe_type = rom_probe_types; !info->mtd && *probe_type;
+		probe_type++)
+		info->mtd = do_map_probe(*probe_type, &info->map);
+
+	if (info->mtd == NULL) {
+		dev_err(&dev->dev, "map_probe failed\n");
+		err = -ENODEV;
+		goto iounmap;
+	}
+	info->mtd->owner = THIS_MODULE;
+
+	if (mtd_has_partitions()) {
+
+		err = parse_mtd_partitions(info->mtd,
+					   (const char **)part_probe_types,
+					   &info->parts, 0);
+		if (err > 0) {
+			add_mtd_partitions(info->mtd, info->parts, err);
+			return 0;
+		}
+		if (latch_addr_data->nr_parts) {
+			pr_notice("Using latch-addr-flash partition information\n");
+			add_mtd_partitions(info->mtd, latch_addr_data->parts,
+					latch_addr_data->nr_parts);
+			return 0;
+		}
+	}
+	add_mtd_device(info->mtd);
+	return 0;
+
+iounmap:
+	iounmap(info->map.virt);
+free_res:
+	release_mem_region(info->res->start, resource_size(info->res));
+free_info:
+	kfree(info);
+done:
+	if (latch_addr_data->done)
+		latch_addr_data->done(latch_addr_data->data);
+	return err;
+}
+
+static struct platform_driver latch_addr_flash_driver = {
+	.probe		= latch_addr_flash_probe,
+	.remove		= __devexit_p(latch_addr_flash_remove),
+	.driver		= {
+		.name	= DRIVER_NAME,
+	},
+};
+
+static int __init latch_addr_flash_init(void)
+{
+	return platform_driver_register(&latch_addr_flash_driver);
+}
+module_init(latch_addr_flash_init);
+
+static void __exit latch_addr_flash_exit(void)
+{
+	platform_driver_unregister(&latch_addr_flash_driver);
+}
+module_exit(latch_addr_flash_exit);
+
+MODULE_AUTHOR("David Griego <dgriego@mvista.com>");
+MODULE_DESCRIPTION("MTD map driver for flashes addressed physically with upper "
+		"address lines being set board specifically");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 4c18b98..7522df4 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c

@@ -59,10 +59,8 @@
 #else
 		del_mtd_device(info->cmtd);
 #endif
-#ifdef CONFIG_MTD_CONCAT
 		if (info->cmtd != info->mtd[0])
 			mtd_concat_destroy(info->cmtd);
-#endif
 	}
 
 	for (i = 0; i < MAX_RESOURCES; i++) {
@@ -159,15 +157,9 @@
 		/*
 		 * We detected multiple devices. Concatenate them together.
 		 */
-#ifdef CONFIG_MTD_CONCAT
 		info->cmtd = mtd_concat_create(info->mtd, devices_found, dev_name(&dev->dev));
 		if (info->cmtd == NULL)
 			err = -ENXIO;
-#else
-		printk(KERN_ERR "physmap-flash: multiple devices "
-		       "found but MTD concat support disabled.\n");
-		err = -ENXIO;
-#endif
 	}
 	if (err)
 		goto err_out;

diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 3db0cb0..bd483f0 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c

@@ -104,12 +104,10 @@
 		return 0;
 	dev_set_drvdata(&dev->dev, NULL);
 
-#ifdef CONFIG_MTD_CONCAT
 	if (info->cmtd != info->list[0].mtd) {
 		del_mtd_device(info->cmtd);
 		mtd_concat_destroy(info->cmtd);
 	}
-#endif
 
 	if (info->cmtd) {
 		if (OF_FLASH_PARTS(info)) {
@@ -337,16 +335,10 @@
 		/*
 		 * We detected multiple devices. Concatenate them together.
 		 */
-#ifdef CONFIG_MTD_CONCAT
 		info->cmtd = mtd_concat_create(mtd_list, info->list_size,
 					       dev_name(&dev->dev));
 		if (info->cmtd == NULL)
 			err = -ENXIO;
-#else
-		printk(KERN_ERR "physmap_of: multiple devices "
-		       "found but MTD concat support disabled.\n");
-		err = -ENXIO;
-#endif
 	}
 	if (err)
 		goto err_out;

diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index f3af87e..da875908 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c

@@ -232,10 +232,8 @@
 		else
 			del_mtd_partitions(info->mtd);
 #endif
-#ifdef CONFIG_MTD_CONCAT
 		if (info->mtd != info->subdev[0].mtd)
 			mtd_concat_destroy(info->mtd);
-#endif
 	}
 
 	kfree(info->parts);
@@ -321,7 +319,6 @@
 		info->mtd = info->subdev[0].mtd;
 		ret = 0;
 	} else if (info->num_subdev > 1) {
-#ifdef CONFIG_MTD_CONCAT
 		struct mtd_info *cdev[nr];
 		/*
 		 * We detected multiple devices.  Concatenate them together.
@@ -333,11 +330,6 @@
 					      plat->name);
 		if (info->mtd == NULL)
 			ret = -ENXIO;
-#else
-		printk(KERN_ERR "SA1100 flash: multiple devices "
-		       "found but MTD concat support disabled.\n");
-		ret = -ENXIO;
-#endif
 	}
 
 	if (ret == 0)

diff --git a/drivers/mtd/maps/ts5500_flash.c b/drivers/mtd/maps/ts5500_flash.c
index e2147bf..e02dfa9 100644
--- a/drivers/mtd/maps/ts5500_flash.c
+++ b/drivers/mtd/maps/ts5500_flash.c

@@ -94,7 +94,6 @@
 	return 0;
 
 err1:
-	map_destroy(mymtd);
 	iounmap(ts5500_map.virt);
 err2:
 	return rc;

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index e0a2373..a534e1f 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c

@@ -40,7 +40,7 @@
 static LIST_HEAD(blktrans_majors);
 static DEFINE_MUTEX(blktrans_ref_mutex);
 
-void blktrans_dev_release(struct kref *kref)
+static void blktrans_dev_release(struct kref *kref)
 {
 	struct mtd_blktrans_dev *dev =
 		container_of(kref, struct mtd_blktrans_dev, ref);
@@ -67,7 +67,7 @@
 	return dev;
 }
 
-void blktrans_dev_put(struct mtd_blktrans_dev *dev)
+static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
 {
 	mutex_lock(&blktrans_ref_mutex);
 	kref_put(&dev->ref, blktrans_dev_release);
@@ -119,18 +119,43 @@
 	}
 }
 
+int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev)
+{
+	if (kthread_should_stop())
+		return 1;
+
+	return dev->bg_stop;
+}
+EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background);
+
 static int mtd_blktrans_thread(void *arg)
 {
 	struct mtd_blktrans_dev *dev = arg;
+	struct mtd_blktrans_ops *tr = dev->tr;
 	struct request_queue *rq = dev->rq;
 	struct request *req = NULL;
+	int background_done = 0;
 
 	spin_lock_irq(rq->queue_lock);
 
 	while (!kthread_should_stop()) {
 		int res;
 
+		dev->bg_stop = false;
 		if (!req && !(req = blk_fetch_request(rq))) {
+			if (tr->background && !background_done) {
+				spin_unlock_irq(rq->queue_lock);
+				mutex_lock(&dev->lock);
+				tr->background(dev);
+				mutex_unlock(&dev->lock);
+				spin_lock_irq(rq->queue_lock);
+				/*
+				 * Do background processing just once per idle
+				 * period.
+				 */
+				background_done = !dev->bg_stop;
+				continue;
+			}
 			set_current_state(TASK_INTERRUPTIBLE);
 
 			if (kthread_should_stop())
@@ -152,6 +177,8 @@
 
 		if (!__blk_end_request_cur(req, res))
 			req = NULL;
+
+		background_done = 0;
 	}
 
 	if (req)
@@ -172,8 +199,10 @@
 	if (!dev)
 		while ((req = blk_fetch_request(rq)) != NULL)
 			__blk_end_request_all(req, -ENODEV);
-	else
+	else {
+		dev->bg_stop = true;
 		wake_up_process(dev->thread);
+	}
 }
 
 static int blktrans_open(struct block_device *bdev, fmode_t mode)
@@ -379,9 +408,10 @@
 	new->rq->queuedata = new;
 	blk_queue_logical_block_size(new->rq, tr->blksize);
 
-	if (tr->discard)
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
-					new->rq);
+	if (tr->discard) {
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
+		new->rq->limits.max_discard_sectors = UINT_MAX;
+	}
 
 	gd->queue = new->rq;
 

diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 5f5777b..5060e60 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c

@@ -750,6 +750,7 @@
 	struct mtd_concat *concat;
 	uint32_t max_erasesize, curr_erasesize;
 	int num_erase_region;
+	int max_writebufsize = 0;
 
 	printk(KERN_NOTICE "Concatenating MTD devices:\n");
 	for (i = 0; i < num_devs; i++)
@@ -776,7 +777,12 @@
 	concat->mtd.size = subdev[0]->size;
 	concat->mtd.erasesize = subdev[0]->erasesize;
 	concat->mtd.writesize = subdev[0]->writesize;
-	concat->mtd.writebufsize = subdev[0]->writebufsize;
+
+	for (i = 0; i < num_devs; i++)
+		if (max_writebufsize < subdev[i]->writebufsize)
+			max_writebufsize = subdev[i]->writebufsize;
+	concat->mtd.writebufsize = max_writebufsize;
+
 	concat->mtd.subpage_sft = subdev[0]->subpage_sft;
 	concat->mtd.oobsize = subdev[0]->oobsize;
 	concat->mtd.oobavail = subdev[0]->oobavail;

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 527cebf..da69bc8 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c

@@ -43,7 +43,7 @@
  * backing device capabilities for non-mappable devices (such as NAND flash)
  * - permits private mappings, copies are taken of the data
  */
-struct backing_dev_info mtd_bdi_unmappable = {
+static struct backing_dev_info mtd_bdi_unmappable = {
 	.capabilities	= BDI_CAP_MAP_COPY,
 };
 
@@ -52,7 +52,7 @@
  * - permits private mappings, copies are taken of the data
  * - permits non-writable shared mappings
  */
-struct backing_dev_info mtd_bdi_ro_mappable = {
+static struct backing_dev_info mtd_bdi_ro_mappable = {
 	.capabilities	= (BDI_CAP_MAP_COPY | BDI_CAP_MAP_DIRECT |
 			   BDI_CAP_EXEC_MAP | BDI_CAP_READ_MAP),
 };
@@ -62,7 +62,7 @@
  * - permits private mappings, copies are taken of the data
  * - permits non-writable shared mappings
  */
-struct backing_dev_info mtd_bdi_rw_mappable = {
+static struct backing_dev_info mtd_bdi_rw_mappable = {
 	.capabilities	= (BDI_CAP_MAP_COPY | BDI_CAP_MAP_DIRECT |
 			   BDI_CAP_EXEC_MAP | BDI_CAP_READ_MAP |
 			   BDI_CAP_WRITE_MAP),

diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
new file mode 100644
index 0000000..237913c
--- /dev/null
+++ b/drivers/mtd/mtdswap.c

@@ -0,0 +1,1587 @@
+/*
+ * Swap block device support for MTDs
+ * Turns an MTD device into a swap device with block wear leveling
+ *
+ * Copyright © 2007,2011 Nokia Corporation. All rights reserved.
+ *
+ * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com>
+ *
+ * Based on Richard Purdie's earlier implementation in 2007. Background
+ * support and lock-less operation written by Adrian Hunter.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/blktrans.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/genhd.h>
+#include <linux/swap.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/device.h>
+#include <linux/math64.h>
+
+#define MTDSWAP_PREFIX "mtdswap"
+
+/*
+ * The number of free eraseblocks when GC should stop
+ */
+#define CLEAN_BLOCK_THRESHOLD	20
+
+/*
+ * Number of free eraseblocks below which GC can also collect low frag
+ * blocks.
+ */
+#define LOW_FRAG_GC_TRESHOLD	5
+
+/*
+ * Wear level cost amortization. We want to do wear leveling on the background
+ * without disturbing gc too much. This is made by defining max GC frequency.
+ * Frequency value 6 means 1/6 of the GC passes will pick an erase block based
+ * on the biggest wear difference rather than the biggest dirtiness.
+ *
+ * The lower freq2 should be chosen so that it makes sure the maximum erase
+ * difference will decrease even if a malicious application is deliberately
+ * trying to make erase differences large.
+ */
+#define MAX_ERASE_DIFF		4000
+#define COLLECT_NONDIRTY_BASE	MAX_ERASE_DIFF
+#define COLLECT_NONDIRTY_FREQ1	6
+#define COLLECT_NONDIRTY_FREQ2	4
+
+#define PAGE_UNDEF		UINT_MAX
+#define BLOCK_UNDEF		UINT_MAX
+#define BLOCK_ERROR		(UINT_MAX - 1)
+#define BLOCK_MAX		(UINT_MAX - 2)
+
+#define EBLOCK_BAD		(1 << 0)
+#define EBLOCK_NOMAGIC		(1 << 1)
+#define EBLOCK_BITFLIP		(1 << 2)
+#define EBLOCK_FAILED		(1 << 3)
+#define EBLOCK_READERR		(1 << 4)
+#define EBLOCK_IDX_SHIFT	5
+
+struct swap_eb {
+	struct rb_node rb;
+	struct rb_root *root;
+
+	unsigned int flags;
+	unsigned int active_count;
+	unsigned int erase_count;
+	unsigned int pad;		/* speeds up pointer decremtnt */
+};
+
+#define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
+				rb)->erase_count)
+#define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
+				rb)->erase_count)
+
+struct mtdswap_tree {
+	struct rb_root root;
+	unsigned int count;
+};
+
+enum {
+	MTDSWAP_CLEAN,
+	MTDSWAP_USED,
+	MTDSWAP_LOWFRAG,
+	MTDSWAP_HIFRAG,
+	MTDSWAP_DIRTY,
+	MTDSWAP_BITFLIP,
+	MTDSWAP_FAILING,
+	MTDSWAP_TREE_CNT,
+};
+
+struct mtdswap_dev {
+	struct mtd_blktrans_dev *mbd_dev;
+	struct mtd_info *mtd;
+	struct device *dev;
+
+	unsigned int *page_data;
+	unsigned int *revmap;
+
+	unsigned int eblks;
+	unsigned int spare_eblks;
+	unsigned int pages_per_eblk;
+	unsigned int max_erase_count;
+	struct swap_eb *eb_data;
+
+	struct mtdswap_tree trees[MTDSWAP_TREE_CNT];
+
+	unsigned long long sect_read_count;
+	unsigned long long sect_write_count;
+	unsigned long long mtd_write_count;
+	unsigned long long mtd_read_count;
+	unsigned long long discard_count;
+	unsigned long long discard_page_count;
+
+	unsigned int curr_write_pos;
+	struct swap_eb *curr_write;
+
+	char *page_buf;
+	char *oob_buf;
+
+	struct dentry *debugfs_root;
+};
+
+struct mtdswap_oobdata {
+	__le16 magic;
+	__le32 count;
+} __attribute__((packed));
+
+#define MTDSWAP_MAGIC_CLEAN	0x2095
+#define MTDSWAP_MAGIC_DIRTY	(MTDSWAP_MAGIC_CLEAN + 1)
+#define MTDSWAP_TYPE_CLEAN	0
+#define MTDSWAP_TYPE_DIRTY	1
+#define MTDSWAP_OOBSIZE		sizeof(struct mtdswap_oobdata)
+
+#define MTDSWAP_ERASE_RETRIES	3 /* Before marking erase block bad */
+#define MTDSWAP_IO_RETRIES	3
+
+enum {
+	MTDSWAP_SCANNED_CLEAN,
+	MTDSWAP_SCANNED_DIRTY,
+	MTDSWAP_SCANNED_BITFLIP,
+	MTDSWAP_SCANNED_BAD,
+};
+
+/*
+ * In the worst case mtdswap_writesect() has allocated the last clean
+ * page from the current block and is then pre-empted by the GC
+ * thread. The thread can consume a full erase block when moving a
+ * block.
+ */
+#define MIN_SPARE_EBLOCKS	2
+#define MIN_ERASE_BLOCKS	(MIN_SPARE_EBLOCKS + 1)
+
+#define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
+#define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
+#define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
+#define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
+
+#define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
+
+static char partitions[128] = "";
+module_param_string(partitions, partitions, sizeof(partitions), 0444);
+MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap "
+		"partitions=\"1,3,5\"");
+
+static unsigned int spare_eblocks = 10;
+module_param(spare_eblocks, uint, 0444);
+MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for "
+		"garbage collection (default 10%)");
+
+static bool header; /* false */
+module_param(header, bool, 0444);
+MODULE_PARM_DESC(header,
+		"Include builtin swap header (default 0, without header)");
+
+static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background);
+
+static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb)
+{
+	return (loff_t)(eb - d->eb_data) * d->mtd->erasesize;
+}
+
+static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb)
+{
+	unsigned int oldidx;
+	struct mtdswap_tree *tp;
+
+	if (eb->root) {
+		tp = container_of(eb->root, struct mtdswap_tree, root);
+		oldidx = tp - &d->trees[0];
+
+		d->trees[oldidx].count--;
+		rb_erase(&eb->rb, eb->root);
+	}
+}
+
+static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb)
+{
+	struct rb_node **p, *parent = NULL;
+	struct swap_eb *cur;
+
+	p = &root->rb_node;
+	while (*p) {
+		parent = *p;
+		cur = rb_entry(parent, struct swap_eb, rb);
+		if (eb->erase_count > cur->erase_count)
+			p = &(*p)->rb_right;
+		else
+			p = &(*p)->rb_left;
+	}
+
+	rb_link_node(&eb->rb, parent, p);
+	rb_insert_color(&eb->rb, root);
+}
+
+static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx)
+{
+	struct rb_root *root;
+
+	if (eb->root == &d->trees[idx].root)
+		return;
+
+	mtdswap_eb_detach(d, eb);
+	root = &d->trees[idx].root;
+	__mtdswap_rb_add(root, eb);
+	eb->root = root;
+	d->trees[idx].count++;
+}
+
+static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx)
+{
+	struct rb_node *p;
+	unsigned int i;
+
+	p = rb_first(root);
+	i = 0;
+	while (i < idx && p) {
+		p = rb_next(p);
+		i++;
+	}
+
+	return p;
+}
+
+static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
+{
+	int ret;
+	loff_t offset;
+
+	d->spare_eblks--;
+	eb->flags |= EBLOCK_BAD;
+	mtdswap_eb_detach(d, eb);
+	eb->root = NULL;
+
+	/* badblocks not supported */
+	if (!d->mtd->block_markbad)
+		return 1;
+
+	offset = mtdswap_eb_offset(d, eb);
+	dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
+	ret = d->mtd->block_markbad(d->mtd, offset);
+
+	if (ret) {
+		dev_warn(d->dev, "Mark block bad failed for block at %08llx "
+			"error %d\n", offset, ret);
+		return ret;
+	}
+
+	return 1;
+
+}
+
+static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
+{
+	unsigned int marked = eb->flags & EBLOCK_FAILED;
+	struct swap_eb *curr_write = d->curr_write;
+
+	eb->flags |= EBLOCK_FAILED;
+	if (curr_write == eb) {
+		d->curr_write = NULL;
+
+		if (!marked && d->curr_write_pos != 0) {
+			mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
+			return 0;
+		}
+	}
+
+	return mtdswap_handle_badblock(d, eb);
+}
+
+static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
+			struct mtd_oob_ops *ops)
+{
+	int ret = d->mtd->read_oob(d->mtd, from, ops);
+
+	if (ret == -EUCLEAN)
+		return ret;
+
+	if (ret) {
+		dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n",
+			ret, from);
+		return ret;
+	}
+
+	if (ops->oobretlen < ops->ooblen) {
+		dev_warn(d->dev, "Read OOB return short read (%zd bytes not "
+			"%zd) for block at %08llx\n",
+			ops->oobretlen, ops->ooblen, from);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
+{
+	struct mtdswap_oobdata *data, *data2;
+	int ret;
+	loff_t offset;
+	struct mtd_oob_ops ops;
+
+	offset = mtdswap_eb_offset(d, eb);
+
+	/* Check first if the block is bad. */
+	if (d->mtd->block_isbad && d->mtd->block_isbad(d->mtd, offset))
+		return MTDSWAP_SCANNED_BAD;
+
+	ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
+	ops.oobbuf = d->oob_buf;
+	ops.ooboffs = 0;
+	ops.datbuf = NULL;
+	ops.mode = MTD_OOB_AUTO;
+
+	ret = mtdswap_read_oob(d, offset, &ops);
+
+	if (ret && ret != -EUCLEAN)
+		return ret;
+
+	data = (struct mtdswap_oobdata *)d->oob_buf;
+	data2 = (struct mtdswap_oobdata *)
+		(d->oob_buf + d->mtd->ecclayout->oobavail);
+
+	if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
+		eb->erase_count = le32_to_cpu(data->count);
+		if (ret == -EUCLEAN)
+			ret = MTDSWAP_SCANNED_BITFLIP;
+		else {
+			if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY)
+				ret = MTDSWAP_SCANNED_DIRTY;
+			else
+				ret = MTDSWAP_SCANNED_CLEAN;
+		}
+	} else {
+		eb->flags |= EBLOCK_NOMAGIC;
+		ret = MTDSWAP_SCANNED_DIRTY;
+	}
+
+	return ret;
+}
+
+static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
+				u16 marker)
+{
+	struct mtdswap_oobdata n;
+	int ret;
+	loff_t offset;
+	struct mtd_oob_ops ops;
+
+	ops.ooboffs = 0;
+	ops.oobbuf = (uint8_t *)&n;
+	ops.mode = MTD_OOB_AUTO;
+	ops.datbuf = NULL;
+
+	if (marker == MTDSWAP_TYPE_CLEAN) {
+		n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN);
+		n.count = cpu_to_le32(eb->erase_count);
+		ops.ooblen = MTDSWAP_OOBSIZE;
+		offset = mtdswap_eb_offset(d, eb);
+	} else {
+		n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY);
+		ops.ooblen = sizeof(n.magic);
+		offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
+	}
+
+	ret = d->mtd->write_oob(d->mtd, offset , &ops);
+
+	if (ret) {
+		dev_warn(d->dev, "Write OOB failed for block at %08llx "
+			"error %d\n", offset, ret);
+		if (ret == -EIO || ret == -EBADMSG)
+			mtdswap_handle_write_error(d, eb);
+		return ret;
+	}
+
+	if (ops.oobretlen != ops.ooblen) {
+		dev_warn(d->dev, "Short OOB write for block at %08llx: "
+			"%zd not %zd\n",
+			offset, ops.oobretlen, ops.ooblen);
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Are there any erase blocks without MAGIC_CLEAN header, presumably
+ * because power was cut off after erase but before header write? We
+ * need to guestimate the erase count.
+ */
+static void mtdswap_check_counts(struct mtdswap_dev *d)
+{
+	struct rb_root hist_root = RB_ROOT;
+	struct rb_node *medrb;
+	struct swap_eb *eb;
+	unsigned int i, cnt, median;
+
+	cnt = 0;
+	for (i = 0; i < d->eblks; i++) {
+		eb = d->eb_data + i;
+
+		if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
+			continue;
+
+		__mtdswap_rb_add(&hist_root, eb);
+		cnt++;
+	}
+
+	if (cnt == 0)
+		return;
+
+	medrb = mtdswap_rb_index(&hist_root, cnt / 2);
+	median = rb_entry(medrb, struct swap_eb, rb)->erase_count;
+
+	d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root);
+
+	for (i = 0; i < d->eblks; i++) {
+		eb = d->eb_data + i;
+
+		if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR))
+			eb->erase_count = median;
+
+		if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR))
+			continue;
+
+		rb_erase(&eb->rb, &hist_root);
+	}
+}
+
+static void mtdswap_scan_eblks(struct mtdswap_dev *d)
+{
+	int status;
+	unsigned int i, idx;
+	struct swap_eb *eb;
+
+	for (i = 0; i < d->eblks; i++) {
+		eb = d->eb_data + i;
+
+		status = mtdswap_read_markers(d, eb);
+		if (status < 0)
+			eb->flags |= EBLOCK_READERR;
+		else if (status == MTDSWAP_SCANNED_BAD) {
+			eb->flags |= EBLOCK_BAD;
+			continue;
+		}
+
+		switch (status) {
+		case MTDSWAP_SCANNED_CLEAN:
+			idx = MTDSWAP_CLEAN;
+			break;
+		case MTDSWAP_SCANNED_DIRTY:
+		case MTDSWAP_SCANNED_BITFLIP:
+			idx = MTDSWAP_DIRTY;
+			break;
+		default:
+			idx = MTDSWAP_FAILING;
+		}
+
+		eb->flags |= (idx << EBLOCK_IDX_SHIFT);
+	}
+
+	mtdswap_check_counts(d);
+
+	for (i = 0; i < d->eblks; i++) {
+		eb = d->eb_data + i;
+
+		if (eb->flags & EBLOCK_BAD)
+			continue;
+
+		idx = eb->flags >> EBLOCK_IDX_SHIFT;
+		mtdswap_rb_add(d, eb, idx);
+	}
+}
+
+/*
+ * Place eblk into a tree corresponding to its number of active blocks
+ * it contains.
+ */
+static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb)
+{
+	unsigned int weight = eb->active_count;
+	unsigned int maxweight = d->pages_per_eblk;
+
+	if (eb == d->curr_write)
+		return;
+
+	if (eb->flags & EBLOCK_BITFLIP)
+		mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
+	else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED))
+		mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
+	if (weight == maxweight)
+		mtdswap_rb_add(d, eb, MTDSWAP_USED);
+	else if (weight == 0)
+		mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
+	else if (weight > (maxweight/2))
+		mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG);
+	else
+		mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG);
+}
+
+
+static void mtdswap_erase_callback(struct erase_info *done)
+{
+	wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv;
+	wake_up(wait_q);
+}
+
+static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb)
+{
+	struct mtd_info *mtd = d->mtd;
+	struct erase_info erase;
+	wait_queue_head_t wq;
+	unsigned int retries = 0;
+	int ret;
+
+	eb->erase_count++;
+	if (eb->erase_count > d->max_erase_count)
+		d->max_erase_count = eb->erase_count;
+
+retry:
+	init_waitqueue_head(&wq);
+	memset(&erase, 0, sizeof(struct erase_info));
+
+	erase.mtd	= mtd;
+	erase.callback	= mtdswap_erase_callback;
+	erase.addr	= mtdswap_eb_offset(d, eb);
+	erase.len	= mtd->erasesize;
+	erase.priv	= (u_long)&wq;
+
+	ret = mtd->erase(mtd, &erase);
+	if (ret) {
+		if (retries++ < MTDSWAP_ERASE_RETRIES) {
+			dev_warn(d->dev,
+				"erase of erase block %#llx on %s failed",
+				erase.addr, mtd->name);
+			yield();
+			goto retry;
+		}
+
+		dev_err(d->dev, "Cannot erase erase block %#llx on %s\n",
+			erase.addr, mtd->name);
+
+		mtdswap_handle_badblock(d, eb);
+		return -EIO;
+	}
+
+	ret = wait_event_interruptible(wq, erase.state == MTD_ERASE_DONE ||
+					   erase.state == MTD_ERASE_FAILED);
+	if (ret) {
+		dev_err(d->dev, "Interrupted erase block %#llx erassure on %s",
+			erase.addr, mtd->name);
+		return -EINTR;
+	}
+
+	if (erase.state == MTD_ERASE_FAILED) {
+		if (retries++ < MTDSWAP_ERASE_RETRIES) {
+			dev_warn(d->dev,
+				"erase of erase block %#llx on %s failed",
+				erase.addr, mtd->name);
+			yield();
+			goto retry;
+		}
+
+		mtdswap_handle_badblock(d, eb);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page,
+				unsigned int *block)
+{
+	int ret;
+	struct swap_eb *old_eb = d->curr_write;
+	struct rb_root *clean_root;
+	struct swap_eb *eb;
+
+	if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) {
+		do {
+			if (TREE_EMPTY(d, CLEAN))
+				return -ENOSPC;
+
+			clean_root = TREE_ROOT(d, CLEAN);
+			eb = rb_entry(rb_first(clean_root), struct swap_eb, rb);
+			rb_erase(&eb->rb, clean_root);
+			eb->root = NULL;
+			TREE_COUNT(d, CLEAN)--;
+
+			ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY);
+		} while (ret == -EIO || ret == -EBADMSG);
+
+		if (ret)
+			return ret;
+
+		d->curr_write_pos = 0;
+		d->curr_write = eb;
+		if (old_eb)
+			mtdswap_store_eb(d, old_eb);
+	}
+
+	*block = (d->curr_write - d->eb_data) * d->pages_per_eblk +
+		d->curr_write_pos;
+
+	d->curr_write->active_count++;
+	d->revmap[*block] = page;
+	d->curr_write_pos++;
+
+	return 0;
+}
+
+static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d)
+{
+	return TREE_COUNT(d, CLEAN) * d->pages_per_eblk +
+		d->pages_per_eblk - d->curr_write_pos;
+}
+
+static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d)
+{
+	return mtdswap_free_page_cnt(d) > d->pages_per_eblk;
+}
+
+static int mtdswap_write_block(struct mtdswap_dev *d, char *buf,
+			unsigned int page, unsigned int *bp, int gc_context)
+{
+	struct mtd_info *mtd = d->mtd;
+	struct swap_eb *eb;
+	size_t retlen;
+	loff_t writepos;
+	int ret;
+
+retry:
+	if (!gc_context)
+		while (!mtdswap_enough_free_pages(d))
+			if (mtdswap_gc(d, 0) > 0)
+				return -ENOSPC;
+
+	ret = mtdswap_map_free_block(d, page, bp);
+	eb = d->eb_data + (*bp / d->pages_per_eblk);
+
+	if (ret == -EIO || ret == -EBADMSG) {
+		d->curr_write = NULL;
+		eb->active_count--;
+		d->revmap[*bp] = PAGE_UNDEF;
+		goto retry;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	writepos = (loff_t)*bp << PAGE_SHIFT;
+	ret =  mtd->write(mtd, writepos, PAGE_SIZE, &retlen, buf);
+	if (ret == -EIO || ret == -EBADMSG) {
+		d->curr_write_pos--;
+		eb->active_count--;
+		d->revmap[*bp] = PAGE_UNDEF;
+		mtdswap_handle_write_error(d, eb);
+		goto retry;
+	}
+
+	if (ret < 0) {
+		dev_err(d->dev, "Write to MTD device failed: %d (%zd written)",
+			ret, retlen);
+		goto err;
+	}
+
+	if (retlen != PAGE_SIZE) {
+		dev_err(d->dev, "Short write to MTD device: %zd written",
+			retlen);
+		ret = -EIO;
+		goto err;
+	}
+
+	return ret;
+
+err:
+	d->curr_write_pos--;
+	eb->active_count--;
+	d->revmap[*bp] = PAGE_UNDEF;
+
+	return ret;
+}
+
+static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
+		unsigned int *newblock)
+{
+	struct mtd_info *mtd = d->mtd;
+	struct swap_eb *eb, *oldeb;
+	int ret;
+	size_t retlen;
+	unsigned int page, retries;
+	loff_t readpos;
+
+	page = d->revmap[oldblock];
+	readpos = (loff_t) oldblock << PAGE_SHIFT;
+	retries = 0;
+
+retry:
+	ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
+
+	if (ret < 0 && ret != -EUCLEAN) {
+		oldeb = d->eb_data + oldblock / d->pages_per_eblk;
+		oldeb->flags |= EBLOCK_READERR;
+
+		dev_err(d->dev, "Read Error: %d (block %u)\n", ret,
+			oldblock);
+		retries++;
+		if (retries < MTDSWAP_IO_RETRIES)
+			goto retry;
+
+		goto read_error;
+	}
+
+	if (retlen != PAGE_SIZE) {
+		dev_err(d->dev, "Short read: %zd (block %u)\n", retlen,
+		       oldblock);
+		ret = -EIO;
+		goto read_error;
+	}
+
+	ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1);
+	if (ret < 0) {
+		d->page_data[page] = BLOCK_ERROR;
+		dev_err(d->dev, "Write error: %d\n", ret);
+		return ret;
+	}
+
+	eb = d->eb_data + *newblock / d->pages_per_eblk;
+	d->page_data[page] = *newblock;
+	d->revmap[oldblock] = PAGE_UNDEF;
+	eb = d->eb_data + oldblock / d->pages_per_eblk;
+	eb->active_count--;
+
+	return 0;
+
+read_error:
+	d->page_data[page] = BLOCK_ERROR;
+	d->revmap[oldblock] = PAGE_UNDEF;
+	return ret;
+}
+
+static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb)
+{
+	unsigned int i, block, eblk_base, newblock;
+	int ret, errcode;
+
+	errcode = 0;
+	eblk_base = (eb - d->eb_data) * d->pages_per_eblk;
+
+	for (i = 0; i < d->pages_per_eblk; i++) {
+		if (d->spare_eblks < MIN_SPARE_EBLOCKS)
+			return -ENOSPC;
+
+		block = eblk_base + i;
+		if (d->revmap[block] == PAGE_UNDEF)
+			continue;
+
+		ret = mtdswap_move_block(d, block, &newblock);
+		if (ret < 0 && !errcode)
+			errcode = ret;
+	}
+
+	return errcode;
+}
+
+static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d)
+{
+	int idx, stopat;
+
+	if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_TRESHOLD)
+		stopat = MTDSWAP_LOWFRAG;
+	else
+		stopat = MTDSWAP_HIFRAG;
+
+	for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--)
+		if (d->trees[idx].root.rb_node != NULL)
+			return idx;
+
+	return -1;
+}
+
+static int mtdswap_wlfreq(unsigned int maxdiff)
+{
+	unsigned int h, x, y, dist, base;
+
+	/*
+	 * Calculate linear ramp down from f1 to f2 when maxdiff goes from
+	 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE.  Similar
+	 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE.
+	 */
+
+	dist = maxdiff - MAX_ERASE_DIFF;
+	if (dist > COLLECT_NONDIRTY_BASE)
+		dist = COLLECT_NONDIRTY_BASE;
+
+	/*
+	 * Modelling the slop as right angular triangle with base
+	 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is
+	 * equal to the ratio h/base.
+	 */
+	h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2;
+	base = COLLECT_NONDIRTY_BASE;
+
+	x = dist - base;
+	y = (x * h + base / 2) / base;
+
+	return COLLECT_NONDIRTY_FREQ2 + y;
+}
+
+static int mtdswap_choose_wl_tree(struct mtdswap_dev *d)
+{
+	static unsigned int pick_cnt;
+	unsigned int i, idx = -1, wear, max;
+	struct rb_root *root;
+
+	max = 0;
+	for (i = 0; i <= MTDSWAP_DIRTY; i++) {
+		root = &d->trees[i].root;
+		if (root->rb_node == NULL)
+			continue;
+
+		wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root);
+		if (wear > max) {
+			max = wear;
+			idx = i;
+		}
+	}
+
+	if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
+		pick_cnt = 0;
+		return idx;
+	}
+
+	pick_cnt++;
+	return -1;
+}
+
+static int mtdswap_choose_gc_tree(struct mtdswap_dev *d,
+				unsigned int background)
+{
+	int idx;
+
+	if (TREE_NONEMPTY(d, FAILING) &&
+		(background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY))))
+		return MTDSWAP_FAILING;
+
+	idx = mtdswap_choose_wl_tree(d);
+	if (idx >= MTDSWAP_CLEAN)
+		return idx;
+
+	return __mtdswap_choose_gc_tree(d);
+}
+
+static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d,
+					unsigned int background)
+{
+	struct rb_root *rp = NULL;
+	struct swap_eb *eb = NULL;
+	int idx;
+
+	if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD &&
+		TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING))
+		return NULL;
+
+	idx = mtdswap_choose_gc_tree(d, background);
+	if (idx < 0)
+		return NULL;
+
+	rp = &d->trees[idx].root;
+	eb = rb_entry(rb_first(rp), struct swap_eb, rb);
+
+	rb_erase(&eb->rb, rp);
+	eb->root = NULL;
+	d->trees[idx].count--;
+	return eb;
+}
+
+static unsigned int mtdswap_test_patt(unsigned int i)
+{
+	return i % 2 ? 0x55555555 : 0xAAAAAAAA;
+}
+
+static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
+					struct swap_eb *eb)
+{
+	struct mtd_info *mtd = d->mtd;
+	unsigned int test, i, j, patt, mtd_pages;
+	loff_t base, pos;
+	unsigned int *p1 = (unsigned int *)d->page_buf;
+	unsigned char *p2 = (unsigned char *)d->oob_buf;
+	struct mtd_oob_ops ops;
+	int ret;
+
+	ops.mode = MTD_OOB_AUTO;
+	ops.len = mtd->writesize;
+	ops.ooblen = mtd->ecclayout->oobavail;
+	ops.ooboffs = 0;
+	ops.datbuf = d->page_buf;
+	ops.oobbuf = d->oob_buf;
+	base = mtdswap_eb_offset(d, eb);
+	mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize;
+
+	for (test = 0; test < 2; test++) {
+		pos = base;
+		for (i = 0; i < mtd_pages; i++) {
+			patt = mtdswap_test_patt(test + i);
+			memset(d->page_buf, patt, mtd->writesize);
+			memset(d->oob_buf, patt, mtd->ecclayout->oobavail);
+			ret = mtd->write_oob(mtd, pos, &ops);
+			if (ret)
+				goto error;
+
+			pos += mtd->writesize;
+		}
+
+		pos = base;
+		for (i = 0; i < mtd_pages; i++) {
+			ret = mtd->read_oob(mtd, pos, &ops);
+			if (ret)
+				goto error;
+
+			patt = mtdswap_test_patt(test + i);
+			for (j = 0; j < mtd->writesize/sizeof(int); j++)
+				if (p1[j] != patt)
+					goto error;
+
+			for (j = 0; j < mtd->ecclayout->oobavail; j++)
+				if (p2[j] != (unsigned char)patt)
+					goto error;
+
+			pos += mtd->writesize;
+		}
+
+		ret = mtdswap_erase_block(d, eb);
+		if (ret)
+			goto error;
+	}
+
+	eb->flags &= ~EBLOCK_READERR;
+	return 1;
+
+error:
+	mtdswap_handle_badblock(d, eb);
+	return 0;
+}
+
+static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background)
+{
+	struct swap_eb *eb;
+	int ret;
+
+	if (d->spare_eblks < MIN_SPARE_EBLOCKS)
+		return 1;
+
+	eb = mtdswap_pick_gc_eblk(d, background);
+	if (!eb)
+		return 1;
+
+	ret = mtdswap_gc_eblock(d, eb);
+	if (ret == -ENOSPC)
+		return 1;
+
+	if (eb->flags & EBLOCK_FAILED) {
+		mtdswap_handle_badblock(d, eb);
+		return 0;
+	}
+
+	eb->flags &= ~EBLOCK_BITFLIP;
+	ret = mtdswap_erase_block(d, eb);
+	if ((eb->flags & EBLOCK_READERR) &&
+		(ret || !mtdswap_eblk_passes(d, eb)))
+		return 0;
+
+	if (ret == 0)
+		ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN);
+
+	if (ret == 0)
+		mtdswap_rb_add(d, eb, MTDSWAP_CLEAN);
+	else if (ret != -EIO && ret != -EBADMSG)
+		mtdswap_rb_add(d, eb, MTDSWAP_DIRTY);
+
+	return 0;
+}
+
+static void mtdswap_background(struct mtd_blktrans_dev *dev)
+{
+	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
+	int ret;
+
+	while (1) {
+		ret = mtdswap_gc(d, 1);
+		if (ret || mtd_blktrans_cease_background(dev))
+			return;
+	}
+}
+
+static void mtdswap_cleanup(struct mtdswap_dev *d)
+{
+	vfree(d->eb_data);
+	vfree(d->revmap);
+	vfree(d->page_data);
+	kfree(d->oob_buf);
+	kfree(d->page_buf);
+}
+
+static int mtdswap_flush(struct mtd_blktrans_dev *dev)
+{
+	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
+
+	if (d->mtd->sync)
+		d->mtd->sync(d->mtd);
+	return 0;
+}
+
+static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
+{
+	loff_t offset;
+	unsigned int badcnt;
+
+	badcnt = 0;
+
+	if (mtd->block_isbad)
+		for (offset = 0; offset < size; offset += mtd->erasesize)
+			if (mtd->block_isbad(mtd, offset))
+				badcnt++;
+
+	return badcnt;
+}
+
+static int mtdswap_writesect(struct mtd_blktrans_dev *dev,
+			unsigned long page, char *buf)
+{
+	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
+	unsigned int newblock, mapped;
+	struct swap_eb *eb;
+	int ret;
+
+	d->sect_write_count++;
+
+	if (d->spare_eblks < MIN_SPARE_EBLOCKS)
+		return -ENOSPC;
+
+	if (header) {
+		/* Ignore writes to the header page */
+		if (unlikely(page == 0))
+			return 0;
+
+		page--;
+	}
+
+	mapped = d->page_data[page];
+	if (mapped <= BLOCK_MAX) {
+		eb = d->eb_data + (mapped / d->pages_per_eblk);
+		eb->active_count--;
+		mtdswap_store_eb(d, eb);
+		d->page_data[page] = BLOCK_UNDEF;
+		d->revmap[mapped] = PAGE_UNDEF;
+	}
+
+	ret = mtdswap_write_block(d, buf, page, &newblock, 0);
+	d->mtd_write_count++;
+
+	if (ret < 0)
+		return ret;
+
+	eb = d->eb_data + (newblock / d->pages_per_eblk);
+	d->page_data[page] = newblock;
+
+	return 0;
+}
+
+/* Provide a dummy swap header for the kernel */
+static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
+{
+	union swap_header *hd = (union swap_header *)(buf);
+
+	memset(buf, 0, PAGE_SIZE - 10);
+
+	hd->info.version = 1;
+	hd->info.last_page = d->mbd_dev->size - 1;
+	hd->info.nr_badpages = 0;
+
+	memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
+
+	return 0;
+}
+
+static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
+			unsigned long page, char *buf)
+{
+	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
+	struct mtd_info *mtd = d->mtd;
+	unsigned int realblock, retries;
+	loff_t readpos;
+	struct swap_eb *eb;
+	size_t retlen;
+	int ret;
+
+	d->sect_read_count++;
+
+	if (header) {
+		if (unlikely(page == 0))
+			return mtdswap_auto_header(d, buf);
+
+		page--;
+	}
+
+	realblock = d->page_data[page];
+	if (realblock > BLOCK_MAX) {
+		memset(buf, 0x0, PAGE_SIZE);
+		if (realblock == BLOCK_UNDEF)
+			return 0;
+		else
+			return -EIO;
+	}
+
+	eb = d->eb_data + (realblock / d->pages_per_eblk);
+	BUG_ON(d->revmap[realblock] == PAGE_UNDEF);
+
+	readpos = (loff_t)realblock << PAGE_SHIFT;
+	retries = 0;
+
+retry:
+	ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, buf);
+
+	d->mtd_read_count++;
+	if (ret == -EUCLEAN) {
+		eb->flags |= EBLOCK_BITFLIP;
+		mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP);
+		ret = 0;
+	}
+
+	if (ret < 0) {
+		dev_err(d->dev, "Read error %d\n", ret);
+		eb->flags |= EBLOCK_READERR;
+		mtdswap_rb_add(d, eb, MTDSWAP_FAILING);
+		retries++;
+		if (retries < MTDSWAP_IO_RETRIES)
+			goto retry;
+
+		return ret;
+	}
+
+	if (retlen != PAGE_SIZE) {
+		dev_err(d->dev, "Short read %zd\n", retlen);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first,
+			unsigned nr_pages)
+{
+	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
+	unsigned long page;
+	struct swap_eb *eb;
+	unsigned int mapped;
+
+	d->discard_count++;
+
+	for (page = first; page < first + nr_pages; page++) {
+		mapped = d->page_data[page];
+		if (mapped <= BLOCK_MAX) {
+			eb = d->eb_data + (mapped / d->pages_per_eblk);
+			eb->active_count--;
+			mtdswap_store_eb(d, eb);
+			d->page_data[page] = BLOCK_UNDEF;
+			d->revmap[mapped] = PAGE_UNDEF;
+			d->discard_page_count++;
+		} else if (mapped == BLOCK_ERROR) {
+			d->page_data[page] = BLOCK_UNDEF;
+			d->discard_page_count++;
+		}
+	}
+
+	return 0;
+}
+
+static int mtdswap_show(struct seq_file *s, void *data)
+{
+	struct mtdswap_dev *d = (struct mtdswap_dev *) s->private;
+	unsigned long sum;
+	unsigned int count[MTDSWAP_TREE_CNT];
+	unsigned int min[MTDSWAP_TREE_CNT];
+	unsigned int max[MTDSWAP_TREE_CNT];
+	unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages;
+	uint64_t use_size;
+	char *name[] = {"clean", "used", "low", "high", "dirty", "bitflip",
+			"failing"};
+
+	mutex_lock(&d->mbd_dev->lock);
+
+	for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
+		struct rb_root *root = &d->trees[i].root;
+
+		if (root->rb_node) {
+			count[i] = d->trees[i].count;
+			min[i] = rb_entry(rb_first(root), struct swap_eb,
+					rb)->erase_count;
+			max[i] = rb_entry(rb_last(root), struct swap_eb,
+					rb)->erase_count;
+		} else
+			count[i] = 0;
+	}
+
+	if (d->curr_write) {
+		cw = 1;
+		cwp = d->curr_write_pos;
+		cwecount = d->curr_write->erase_count;
+	}
+
+	sum = 0;
+	for (i = 0; i < d->eblks; i++)
+		sum += d->eb_data[i].erase_count;
+
+	use_size = (uint64_t)d->eblks * d->mtd->erasesize;
+	bb_cnt = mtdswap_badblocks(d->mtd, use_size);
+
+	mapped = 0;
+	pages = d->mbd_dev->size;
+	for (i = 0; i < pages; i++)
+		if (d->page_data[i] != BLOCK_UNDEF)
+			mapped++;
+
+	mutex_unlock(&d->mbd_dev->lock);
+
+	for (i = 0; i < MTDSWAP_TREE_CNT; i++) {
+		if (!count[i])
+			continue;
+
+		if (min[i] != max[i])
+			seq_printf(s, "%s:\t%5d erase blocks, erased min %d, "
+				"max %d times\n",
+				name[i], count[i], min[i], max[i]);
+		else
+			seq_printf(s, "%s:\t%5d erase blocks, all erased %d "
+				"times\n", name[i], count[i], min[i]);
+	}
+
+	if (bb_cnt)
+		seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt);
+
+	if (cw)
+		seq_printf(s, "current erase block: %u pages used, %u free, "
+			"erased %u times\n",
+			cwp, d->pages_per_eblk - cwp, cwecount);
+
+	seq_printf(s, "total erasures: %lu\n", sum);
+
+	seq_printf(s, "\n");
+
+	seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count);
+	seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count);
+	seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count);
+	seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count);
+	seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count);
+	seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count);
+
+	seq_printf(s, "\n");
+	seq_printf(s, "total pages: %u\n", pages);
+	seq_printf(s, "pages mapped: %u\n", mapped);
+
+	return 0;
+}
+
+static int mtdswap_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mtdswap_show, inode->i_private);
+}
+
+static const struct file_operations mtdswap_fops = {
+	.open		= mtdswap_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int mtdswap_add_debugfs(struct mtdswap_dev *d)
+{
+	struct gendisk *gd = d->mbd_dev->disk;
+	struct device *dev = disk_to_dev(gd);
+
+	struct dentry *root;
+	struct dentry *dent;
+
+	root = debugfs_create_dir(gd->disk_name, NULL);
+	if (IS_ERR(root))
+		return 0;
+
+	if (!root) {
+		dev_err(dev, "failed to initialize debugfs\n");
+		return -1;
+	}
+
+	d->debugfs_root = root;
+
+	dent = debugfs_create_file("stats", S_IRUSR, root, d,
+				&mtdswap_fops);
+	if (!dent) {
+		dev_err(d->dev, "debugfs_create_file failed\n");
+		debugfs_remove_recursive(root);
+		d->debugfs_root = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
+			unsigned int spare_cnt)
+{
+	struct mtd_info *mtd = d->mbd_dev->mtd;
+	unsigned int i, eblk_bytes, pages, blocks;
+	int ret = -ENOMEM;
+
+	d->mtd = mtd;
+	d->eblks = eblocks;
+	d->spare_eblks = spare_cnt;
+	d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT;
+
+	pages = d->mbd_dev->size;
+	blocks = eblocks * d->pages_per_eblk;
+
+	for (i = 0; i < MTDSWAP_TREE_CNT; i++)
+		d->trees[i].root = RB_ROOT;
+
+	d->page_data = vmalloc(sizeof(int)*pages);
+	if (!d->page_data)
+		goto page_data_fail;
+
+	d->revmap = vmalloc(sizeof(int)*blocks);
+	if (!d->revmap)
+		goto revmap_fail;
+
+	eblk_bytes = sizeof(struct swap_eb)*d->eblks;
+	d->eb_data = vmalloc(eblk_bytes);
+	if (!d->eb_data)
+		goto eb_data_fail;
+
+	memset(d->eb_data, 0, eblk_bytes);
+	for (i = 0; i < pages; i++)
+		d->page_data[i] = BLOCK_UNDEF;
+
+	for (i = 0; i < blocks; i++)
+		d->revmap[i] = PAGE_UNDEF;
+
+	d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!d->page_buf)
+		goto page_buf_fail;
+
+	d->oob_buf = kmalloc(2 * mtd->ecclayout->oobavail, GFP_KERNEL);
+	if (!d->oob_buf)
+		goto oob_buf_fail;
+
+	mtdswap_scan_eblks(d);
+
+	return 0;
+
+oob_buf_fail:
+	kfree(d->page_buf);
+page_buf_fail:
+	vfree(d->eb_data);
+eb_data_fail:
+	vfree(d->revmap);
+revmap_fail:
+	vfree(d->page_data);
+page_data_fail:
+	printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret);
+	return ret;
+}
+
+static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
+{
+	struct mtdswap_dev *d;
+	struct mtd_blktrans_dev *mbd_dev;
+	char *parts;
+	char *this_opt;
+	unsigned long part;
+	unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
+	uint64_t swap_size, use_size, size_limit;
+	struct nand_ecclayout *oinfo;
+	int ret;
+
+	parts = &partitions[0];
+	if (!*parts)
+		return;
+
+	while ((this_opt = strsep(&parts, ",")) != NULL) {
+		if (strict_strtoul(this_opt, 0, &part) < 0)
+			return;
+
+		if (mtd->index == part)
+			break;
+	}
+
+	if (mtd->index != part)
+		return;
+
+	if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) {
+		printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE "
+			"%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE);
+		return;
+	}
+
+	if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) {
+		printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size"
+			" %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize);
+		return;
+	}
+
+	oinfo = mtd->ecclayout;
+	if (!mtd->oobsize || !oinfo || oinfo->oobavail < MTDSWAP_OOBSIZE) {
+		printk(KERN_ERR "%s: Not enough free bytes in OOB, "
+			"%d available, %lu needed.\n",
+			MTDSWAP_PREFIX, oinfo->oobavail, MTDSWAP_OOBSIZE);
+		return;
+	}
+
+	if (spare_eblocks > 100)
+		spare_eblocks = 100;
+
+	use_size = mtd->size;
+	size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE;
+
+	if (mtd->size > size_limit) {
+		printk(KERN_WARNING "%s: Device too large. Limiting size to "
+			"%llu bytes\n", MTDSWAP_PREFIX, size_limit);
+		use_size = size_limit;
+	}
+
+	eblocks = mtd_div_by_eb(use_size, mtd);
+	use_size = eblocks * mtd->erasesize;
+	bad_blocks = mtdswap_badblocks(mtd, use_size);
+	eavailable = eblocks - bad_blocks;
+
+	if (eavailable < MIN_ERASE_BLOCKS) {
+		printk(KERN_ERR "%s: Not enough erase blocks. %u available, "
+			"%d needed\n", MTDSWAP_PREFIX, eavailable,
+			MIN_ERASE_BLOCKS);
+		return;
+	}
+
+	spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100);
+
+	if (spare_cnt < MIN_SPARE_EBLOCKS)
+		spare_cnt = MIN_SPARE_EBLOCKS;
+
+	if (spare_cnt > eavailable - 1)
+		spare_cnt = eavailable - 1;
+
+	swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize +
+		(header ? PAGE_SIZE : 0);
+
+	printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
+		"%u spare, %u bad blocks\n",
+		MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks);
+
+	d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL);
+	if (!d)
+		return;
+
+	mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL);
+	if (!mbd_dev) {
+		kfree(d);
+		return;
+	}
+
+	d->mbd_dev = mbd_dev;
+	mbd_dev->priv = d;
+
+	mbd_dev->mtd = mtd;
+	mbd_dev->devnum = mtd->index;
+	mbd_dev->size = swap_size >> PAGE_SHIFT;
+	mbd_dev->tr = tr;
+
+	if (!(mtd->flags & MTD_WRITEABLE))
+		mbd_dev->readonly = 1;
+
+	if (mtdswap_init(d, eblocks, spare_cnt) < 0)
+		goto init_failed;
+
+	if (add_mtd_blktrans_dev(mbd_dev) < 0)
+		goto cleanup;
+
+	d->dev = disk_to_dev(mbd_dev->disk);
+
+	ret = mtdswap_add_debugfs(d);
+	if (ret < 0)
+		goto debugfs_failed;
+
+	return;
+
+debugfs_failed:
+	del_mtd_blktrans_dev(mbd_dev);
+
+cleanup:
+	mtdswap_cleanup(d);
+
+init_failed:
+	kfree(mbd_dev);
+	kfree(d);
+}
+
+static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev)
+{
+	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
+
+	debugfs_remove_recursive(d->debugfs_root);
+	del_mtd_blktrans_dev(dev);
+	mtdswap_cleanup(d);
+	kfree(d);
+}
+
+static struct mtd_blktrans_ops mtdswap_ops = {
+	.name		= "mtdswap",
+	.major		= 0,
+	.part_bits	= 0,
+	.blksize	= PAGE_SIZE,
+	.flush		= mtdswap_flush,
+	.readsect	= mtdswap_readsect,
+	.writesect	= mtdswap_writesect,
+	.discard	= mtdswap_discard,
+	.background	= mtdswap_background,
+	.add_mtd	= mtdswap_add_mtd,
+	.remove_dev	= mtdswap_remove_dev,
+	.owner		= THIS_MODULE,
+};
+
+static int __init mtdswap_modinit(void)
+{
+	return register_mtd_blktrans(&mtdswap_ops);
+}
+
+static void __exit mtdswap_modexit(void)
+{
+	deregister_mtd_blktrans(&mtdswap_ops);
+}
+
+module_init(mtdswap_modinit);
+module_exit(mtdswap_modexit);
+
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
+MODULE_DESCRIPTION("Block device access to an MTD suitable for using as "
+		"swap space");

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 4f6c06f..a92054e 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig

@@ -31,6 +31,21 @@
 	  device thinks the write was successful, a bit could have been
 	  flipped accidentally due to device wear or something else.
 
+config MTD_NAND_BCH
+	tristate
+	select BCH
+	depends on MTD_NAND_ECC_BCH
+	default MTD_NAND
+
+config MTD_NAND_ECC_BCH
+	bool "Support software BCH ECC"
+	default n
+	help
+	  This enables support for software BCH error correction. Binary BCH
+	  codes are more powerful and cpu intensive than traditional Hamming
+	  ECC codes. They are used with NAND devices requiring more than 1 bit
+	  of error correction.
+
 config MTD_SM_COMMON
 	tristate
 	default n

diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 8ad6fae..5745d83 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile

@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_MTD_NAND)			+= nand.o
 obj-$(CONFIG_MTD_NAND_ECC)		+= nand_ecc.o
+obj-$(CONFIG_MTD_NAND_BCH)		+= nand_bch.o
 obj-$(CONFIG_MTD_NAND_IDS)		+= nand_ids.o
 obj-$(CONFIG_MTD_SM_COMMON) 		+= sm_common.o
 

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index ccce0f0..6fae04b 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c

@@ -48,6 +48,9 @@
 #define no_ecc		0
 #endif
 
+static int use_dma = 1;
+module_param(use_dma, int, 0);
+
 static int on_flash_bbt = 0;
 module_param(on_flash_bbt, int, 0);
 
@@ -89,11 +92,20 @@
 	struct nand_chip	nand_chip;
 	struct mtd_info		mtd;
 	void __iomem		*io_base;
+	dma_addr_t		io_phys;
 	struct atmel_nand_data	*board;
 	struct device		*dev;
 	void __iomem		*ecc;
+
+	struct completion	comp;
+	struct dma_chan		*dma_chan;
 };
 
+static int cpu_has_dma(void)
+{
+	return cpu_is_at91sam9rl() || cpu_is_at91sam9g45();
+}
+
 /*
  * Enable NAND.
  */
@@ -150,7 +162,7 @@
 /*
  * Minimal-overhead PIO for data access.
  */
-static void atmel_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+static void atmel_read_buf8(struct mtd_info *mtd, u8 *buf, int len)
 {
 	struct nand_chip	*nand_chip = mtd->priv;
 
@@ -164,7 +176,7 @@
 	__raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2);
 }
 
-static void atmel_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+static void atmel_write_buf8(struct mtd_info *mtd, const u8 *buf, int len)
 {
 	struct nand_chip	*nand_chip = mtd->priv;
 
@@ -178,6 +190,121 @@
 	__raw_writesw(nand_chip->IO_ADDR_W, buf, len / 2);
 }
 
+static void dma_complete_func(void *completion)
+{
+	complete(completion);
+}
+
+static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
+			       int is_read)
+{
+	struct dma_device *dma_dev;
+	enum dma_ctrl_flags flags;
+	dma_addr_t dma_src_addr, dma_dst_addr, phys_addr;
+	struct dma_async_tx_descriptor *tx = NULL;
+	dma_cookie_t cookie;
+	struct nand_chip *chip = mtd->priv;
+	struct atmel_nand_host *host = chip->priv;
+	void *p = buf;
+	int err = -EIO;
+	enum dma_data_direction dir = is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	if (buf >= high_memory) {
+		struct page *pg;
+
+		if (((size_t)buf & PAGE_MASK) !=
+		    ((size_t)(buf + len - 1) & PAGE_MASK)) {
+			dev_warn(host->dev, "Buffer not fit in one page\n");
+			goto err_buf;
+		}
+
+		pg = vmalloc_to_page(buf);
+		if (pg == 0) {
+			dev_err(host->dev, "Failed to vmalloc_to_page\n");
+			goto err_buf;
+		}
+		p = page_address(pg) + ((size_t)buf & ~PAGE_MASK);
+	}
+
+	dma_dev = host->dma_chan->device;
+
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
+		DMA_COMPL_SKIP_DEST_UNMAP;
+
+	phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
+	if (dma_mapping_error(dma_dev->dev, phys_addr)) {
+		dev_err(host->dev, "Failed to dma_map_single\n");
+		goto err_buf;
+	}
+
+	if (is_read) {
+		dma_src_addr = host->io_phys;
+		dma_dst_addr = phys_addr;
+	} else {
+		dma_src_addr = phys_addr;
+		dma_dst_addr = host->io_phys;
+	}
+
+	tx = dma_dev->device_prep_dma_memcpy(host->dma_chan, dma_dst_addr,
+					     dma_src_addr, len, flags);
+	if (!tx) {
+		dev_err(host->dev, "Failed to prepare DMA memcpy\n");
+		goto err_dma;
+	}
+
+	init_completion(&host->comp);
+	tx->callback = dma_complete_func;
+	tx->callback_param = &host->comp;
+
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		dev_err(host->dev, "Failed to do DMA tx_submit\n");
+		goto err_dma;
+	}
+
+	dma_async_issue_pending(host->dma_chan);
+	wait_for_completion(&host->comp);
+
+	err = 0;
+
+err_dma:
+	dma_unmap_single(dma_dev->dev, phys_addr, len, dir);
+err_buf:
+	if (err != 0)
+		dev_warn(host->dev, "Fall back to CPU I/O\n");
+	return err;
+}
+
+static void atmel_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct atmel_nand_host *host = chip->priv;
+
+	if (use_dma && len >= mtd->oobsize)
+		if (atmel_nand_dma_op(mtd, buf, len, 1) == 0)
+			return;
+
+	if (host->board->bus_width_16)
+		atmel_read_buf16(mtd, buf, len);
+	else
+		atmel_read_buf8(mtd, buf, len);
+}
+
+static void atmel_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct atmel_nand_host *host = chip->priv;
+
+	if (use_dma && len >= mtd->oobsize)
+		if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) == 0)
+			return;
+
+	if (host->board->bus_width_16)
+		atmel_write_buf16(mtd, buf, len);
+	else
+		atmel_write_buf8(mtd, buf, len);
+}
+
 /*
  * Calculate HW ECC
  *
@@ -398,6 +525,8 @@
 		return -ENOMEM;
 	}
 
+	host->io_phys = (dma_addr_t)mem->start;
+
 	host->io_base = ioremap(mem->start, mem->end - mem->start + 1);
 	if (host->io_base == NULL) {
 		printk(KERN_ERR "atmel_nand: ioremap failed\n");
@@ -448,14 +577,11 @@
 
 	nand_chip->chip_delay = 20;		/* 20us command delay time */
 
-	if (host->board->bus_width_16) {	/* 16-bit bus width */
+	if (host->board->bus_width_16)	/* 16-bit bus width */
 		nand_chip->options |= NAND_BUSWIDTH_16;
-		nand_chip->read_buf = atmel_read_buf16;
-		nand_chip->write_buf = atmel_write_buf16;
-	} else {
-		nand_chip->read_buf = atmel_read_buf;
-		nand_chip->write_buf = atmel_write_buf;
-	}
+
+	nand_chip->read_buf = atmel_read_buf;
+	nand_chip->write_buf = atmel_write_buf;
 
 	platform_set_drvdata(pdev, host);
 	atmel_nand_enable(host);
@@ -473,6 +599,22 @@
 		nand_chip->options |= NAND_USE_FLASH_BBT;
 	}
 
+	if (cpu_has_dma() && use_dma) {
+		dma_cap_mask_t mask;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_MEMCPY, mask);
+		host->dma_chan = dma_request_channel(mask, 0, NULL);
+		if (!host->dma_chan) {
+			dev_err(host->dev, "Failed to request DMA channel\n");
+			use_dma = 0;
+		}
+	}
+	if (use_dma)
+		dev_info(host->dev, "Using DMA for NAND access.\n");
+	else
+		dev_info(host->dev, "No DMA support for NAND access.\n");
+
 	/* first scan to find the device and get the page size */
 	if (nand_scan_ident(mtd, 1, NULL)) {
 		res = -ENXIO;
@@ -555,6 +697,8 @@
 err_no_card:
 	atmel_nand_disable(host);
 	platform_set_drvdata(pdev, NULL);
+	if (host->dma_chan)
+		dma_release_channel(host->dma_chan);
 	if (host->ecc)
 		iounmap(host->ecc);
 err_ecc_ioremap:
@@ -578,6 +722,10 @@
 
 	if (host->ecc)
 		iounmap(host->ecc);
+
+	if (host->dma_chan)
+		dma_release_channel(host->dma_chan);
+
 	iounmap(host->io_base);
 	kfree(host);
 

diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index a90fde3..aff3468 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c

@@ -37,9 +37,6 @@
 #include <mach/nand.h>
 #include <mach/aemif.h>
 
-#include <asm/mach-types.h>
-
-
 /*
  * This is a device driver for the NAND flash controller found on the
  * various DaVinci family chips.  It handles up to four SoC chipselects,

diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index c2f9543..0b81b5b 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c

@@ -29,6 +29,7 @@
 #include <linux/clk.h>
 #include <linux/gfp.h>
 #include <linux/delay.h>
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -757,9 +758,9 @@
 
 	/* Enable NFC clock */
 	prv->clk = clk_get(dev, "nfc_clk");
-	if (!prv->clk) {
+	if (IS_ERR(prv->clk)) {
 		dev_err(dev, "Unable to acquire NFC clock!\n");
-		retval = -ENODEV;
+		retval = PTR_ERR(prv->clk);
 		goto error;
 	}
 

diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 5ae1d9e..42a95fb 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c

@@ -211,6 +211,31 @@
 	}
 };
 
+/* OOB description for 4096 byte pages with 128 byte OOB */
+static struct nand_ecclayout nandv2_hw_eccoob_4k = {
+	.eccbytes = 8 * 9,
+	.eccpos = {
+		7,  8,  9, 10, 11, 12, 13, 14, 15,
+		23, 24, 25, 26, 27, 28, 29, 30, 31,
+		39, 40, 41, 42, 43, 44, 45, 46, 47,
+		55, 56, 57, 58, 59, 60, 61, 62, 63,
+		71, 72, 73, 74, 75, 76, 77, 78, 79,
+		87, 88, 89, 90, 91, 92, 93, 94, 95,
+		103, 104, 105, 106, 107, 108, 109, 110, 111,
+		119, 120, 121, 122, 123, 124, 125, 126, 127,
+	},
+	.oobfree = {
+		{.offset = 2, .length = 4},
+		{.offset = 16, .length = 7},
+		{.offset = 32, .length = 7},
+		{.offset = 48, .length = 7},
+		{.offset = 64, .length = 7},
+		{.offset = 80, .length = 7},
+		{.offset = 96, .length = 7},
+		{.offset = 112, .length = 7},
+	}
+};
+
 #ifdef CONFIG_MTD_PARTITIONS
 static const char *part_probes[] = { "RedBoot", "cmdlinepart", NULL };
 #endif
@@ -641,9 +666,9 @@
 
 	n = min(n, len);
 
-	memcpy(buf, host->data_buf + col, len);
+	memcpy(buf, host->data_buf + col, n);
 
-	host->buf_start += len;
+	host->buf_start += n;
 }
 
 /* Used by the upper layer to verify the data in NAND Flash
@@ -1185,6 +1210,8 @@
 
 	if (mtd->writesize == 2048)
 		this->ecc.layout = oob_largepage;
+	if (nfc_is_v21() && mtd->writesize == 4096)
+		this->ecc.layout = &nandv2_hw_eccoob_4k;
 
 	/* second phase scan */
 	if (nand_scan_tail(mtd)) {

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index a9c6ce7..85cfc06 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c

@@ -42,6 +42,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/nand_ecc.h>
+#include <linux/mtd/nand_bch.h>
 #include <linux/interrupt.h>
 #include <linux/bitops.h>
 #include <linux/leds.h>
@@ -2377,7 +2378,7 @@
 		return -EINVAL;
 	}
 
-	/* Do not allow reads past end of device */
+	/* Do not allow write past end of device */
 	if (unlikely(to >= mtd->size ||
 		     ops->ooboffs + ops->ooblen >
 			((mtd->size >> chip->page_shift) -
@@ -3248,7 +3249,7 @@
 	/*
 	 * If no default placement scheme is given, select an appropriate one
 	 */
-	if (!chip->ecc.layout) {
+	if (!chip->ecc.layout && (chip->ecc.mode != NAND_ECC_SOFT_BCH)) {
 		switch (mtd->oobsize) {
 		case 8:
 			chip->ecc.layout = &nand_oob_8;
@@ -3351,6 +3352,40 @@
 		chip->ecc.bytes = 3;
 		break;
 
+	case NAND_ECC_SOFT_BCH:
+		if (!mtd_nand_has_bch()) {
+			printk(KERN_WARNING "CONFIG_MTD_ECC_BCH not enabled\n");
+			BUG();
+		}
+		chip->ecc.calculate = nand_bch_calculate_ecc;
+		chip->ecc.correct = nand_bch_correct_data;
+		chip->ecc.read_page = nand_read_page_swecc;
+		chip->ecc.read_subpage = nand_read_subpage;
+		chip->ecc.write_page = nand_write_page_swecc;
+		chip->ecc.read_page_raw = nand_read_page_raw;
+		chip->ecc.write_page_raw = nand_write_page_raw;
+		chip->ecc.read_oob = nand_read_oob_std;
+		chip->ecc.write_oob = nand_write_oob_std;
+		/*
+		 * Board driver should supply ecc.size and ecc.bytes values to
+		 * select how many bits are correctable; see nand_bch_init()
+		 * for details.
+		 * Otherwise, default to 4 bits for large page devices
+		 */
+		if (!chip->ecc.size && (mtd->oobsize >= 64)) {
+			chip->ecc.size = 512;
+			chip->ecc.bytes = 7;
+		}
+		chip->ecc.priv = nand_bch_init(mtd,
+					       chip->ecc.size,
+					       chip->ecc.bytes,
+					       &chip->ecc.layout);
+		if (!chip->ecc.priv) {
+			printk(KERN_WARNING "BCH ECC initialization failed!\n");
+			BUG();
+		}
+		break;
+
 	case NAND_ECC_NONE:
 		printk(KERN_WARNING "NAND_ECC_NONE selected by board driver. "
 		       "This is not recommended !!\n");
@@ -3501,6 +3536,9 @@
 {
 	struct nand_chip *chip = mtd->priv;
 
+	if (chip->ecc.mode == NAND_ECC_SOFT_BCH)
+		nand_bch_free((struct nand_bch_control *)chip->ecc.priv);
+
 #ifdef CONFIG_MTD_PARTITIONS
 	/* Deregister partitions */
 	del_mtd_partitions(mtd);

diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 6ebd869..a1e8b30 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c

@@ -1101,12 +1101,16 @@
 static void verify_bbt_descr(struct mtd_info *mtd, struct nand_bbt_descr *bd)
 {
 	struct nand_chip *this = mtd->priv;
-	u32 pattern_len = bd->len;
-	u32 bits = bd->options & NAND_BBT_NRBITS_MSK;
+	u32 pattern_len;
+	u32 bits;
 	u32 table_size;
 
 	if (!bd)
 		return;
+
+	pattern_len = bd->len;
+	bits = bd->options & NAND_BBT_NRBITS_MSK;
+
 	BUG_ON((this->options & NAND_USE_FLASH_BBT_NO_OOB) &&
 			!(this->options & NAND_USE_FLASH_BBT));
 	BUG_ON(!bits);

diff --git a/drivers/mtd/nand/nand_bch.c b/drivers/mtd/nand/nand_bch.c
new file mode 100644
index 0000000..0f931e7
--- /dev/null
+++ b/drivers/mtd/nand/nand_bch.c

@@ -0,0 +1,243 @@
+/*
+ * This file provides ECC correction for more than 1 bit per block of data,
+ * using binary BCH codes. It relies on the generic BCH library lib/bch.c.
+ *
+ * Copyright © 2011 Ivan Djelic <ivan.djelic@parrot.com>
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this file; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/nand_bch.h>
+#include <linux/bch.h>
+
+/**
+ * struct nand_bch_control - private NAND BCH control structure
+ * @bch:       BCH control structure
+ * @ecclayout: private ecc layout for this BCH configuration
+ * @errloc:    error location array
+ * @eccmask:   XOR ecc mask, allows erased pages to be decoded as valid
+ */
+struct nand_bch_control {
+	struct bch_control   *bch;
+	struct nand_ecclayout ecclayout;
+	unsigned int         *errloc;
+	unsigned char        *eccmask;
+};
+
+/**
+ * nand_bch_calculate_ecc - [NAND Interface] Calculate ECC for data block
+ * @mtd:	MTD block structure
+ * @buf:	input buffer with raw data
+ * @code:	output buffer with ECC
+ */
+int nand_bch_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+			   unsigned char *code)
+{
+	const struct nand_chip *chip = mtd->priv;
+	struct nand_bch_control *nbc = chip->ecc.priv;
+	unsigned int i;
+
+	memset(code, 0, chip->ecc.bytes);
+	encode_bch(nbc->bch, buf, chip->ecc.size, code);
+
+	/* apply mask so that an erased page is a valid codeword */
+	for (i = 0; i < chip->ecc.bytes; i++)
+		code[i] ^= nbc->eccmask[i];
+
+	return 0;
+}
+EXPORT_SYMBOL(nand_bch_calculate_ecc);
+
+/**
+ * nand_bch_correct_data - [NAND Interface] Detect and correct bit error(s)
+ * @mtd:	MTD block structure
+ * @buf:	raw data read from the chip
+ * @read_ecc:	ECC from the chip
+ * @calc_ecc:	the ECC calculated from raw data
+ *
+ * Detect and correct bit errors for a data byte block
+ */
+int nand_bch_correct_data(struct mtd_info *mtd, unsigned char *buf,
+			  unsigned char *read_ecc, unsigned char *calc_ecc)
+{
+	const struct nand_chip *chip = mtd->priv;
+	struct nand_bch_control *nbc = chip->ecc.priv;
+	unsigned int *errloc = nbc->errloc;
+	int i, count;
+
+	count = decode_bch(nbc->bch, NULL, chip->ecc.size, read_ecc, calc_ecc,
+			   NULL, errloc);
+	if (count > 0) {
+		for (i = 0; i < count; i++) {
+			if (errloc[i] < (chip->ecc.size*8))
+				/* error is located in data, correct it */
+				buf[errloc[i] >> 3] ^= (1 << (errloc[i] & 7));
+			/* else error in ecc, no action needed */
+
+			DEBUG(MTD_DEBUG_LEVEL0, "%s: corrected bitflip %u\n",
+			      __func__, errloc[i]);
+		}
+	} else if (count < 0) {
+		printk(KERN_ERR "ecc unrecoverable error\n");
+		count = -1;
+	}
+	return count;
+}
+EXPORT_SYMBOL(nand_bch_correct_data);
+
+/**
+ * nand_bch_init - [NAND Interface] Initialize NAND BCH error correction
+ * @mtd:	MTD block structure
+ * @eccsize:	ecc block size in bytes
+ * @eccbytes:	ecc length in bytes
+ * @ecclayout:	output default layout
+ *
+ * Returns:
+ *  a pointer to a new NAND BCH control structure, or NULL upon failure
+ *
+ * Initialize NAND BCH error correction. Parameters @eccsize and @eccbytes
+ * are used to compute BCH parameters m (Galois field order) and t (error
+ * correction capability). @eccbytes should be equal to the number of bytes
+ * required to store m*t bits, where m is such that 2^m-1 > @eccsize*8.
+ *
+ * Example: to configure 4 bit correction per 512 bytes, you should pass
+ * @eccsize = 512  (thus, m=13 is the smallest integer such that 2^m-1 > 512*8)
+ * @eccbytes = 7   (7 bytes are required to store m*t = 13*4 = 52 bits)
+ */
+struct nand_bch_control *
+nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
+	      struct nand_ecclayout **ecclayout)
+{
+	unsigned int m, t, eccsteps, i;
+	struct nand_ecclayout *layout;
+	struct nand_bch_control *nbc = NULL;
+	unsigned char *erased_page;
+
+	if (!eccsize || !eccbytes) {
+		printk(KERN_WARNING "ecc parameters not supplied\n");
+		goto fail;
+	}
+
+	m = fls(1+8*eccsize);
+	t = (eccbytes*8)/m;
+
+	nbc = kzalloc(sizeof(*nbc), GFP_KERNEL);
+	if (!nbc)
+		goto fail;
+
+	nbc->bch = init_bch(m, t, 0);
+	if (!nbc->bch)
+		goto fail;
+
+	/* verify that eccbytes has the expected value */
+	if (nbc->bch->ecc_bytes != eccbytes) {
+		printk(KERN_WARNING "invalid eccbytes %u, should be %u\n",
+		       eccbytes, nbc->bch->ecc_bytes);
+		goto fail;
+	}
+
+	eccsteps = mtd->writesize/eccsize;
+
+	/* if no ecc placement scheme was provided, build one */
+	if (!*ecclayout) {
+
+		/* handle large page devices only */
+		if (mtd->oobsize < 64) {
+			printk(KERN_WARNING "must provide an oob scheme for "
+			       "oobsize %d\n", mtd->oobsize);
+			goto fail;
+		}
+
+		layout = &nbc->ecclayout;
+		layout->eccbytes = eccsteps*eccbytes;
+
+		/* reserve 2 bytes for bad block marker */
+		if (layout->eccbytes+2 > mtd->oobsize) {
+			printk(KERN_WARNING "no suitable oob scheme available "
+			       "for oobsize %d eccbytes %u\n", mtd->oobsize,
+			       eccbytes);
+			goto fail;
+		}
+		/* put ecc bytes at oob tail */
+		for (i = 0; i < layout->eccbytes; i++)
+			layout->eccpos[i] = mtd->oobsize-layout->eccbytes+i;
+
+		layout->oobfree[0].offset = 2;
+		layout->oobfree[0].length = mtd->oobsize-2-layout->eccbytes;
+
+		*ecclayout = layout;
+	}
+
+	/* sanity checks */
+	if (8*(eccsize+eccbytes) >= (1 << m)) {
+		printk(KERN_WARNING "eccsize %u is too large\n", eccsize);
+		goto fail;
+	}
+	if ((*ecclayout)->eccbytes != (eccsteps*eccbytes)) {
+		printk(KERN_WARNING "invalid ecc layout\n");
+		goto fail;
+	}
+
+	nbc->eccmask = kmalloc(eccbytes, GFP_KERNEL);
+	nbc->errloc = kmalloc(t*sizeof(*nbc->errloc), GFP_KERNEL);
+	if (!nbc->eccmask || !nbc->errloc)
+		goto fail;
+	/*
+	 * compute and store the inverted ecc of an erased ecc block
+	 */
+	erased_page = kmalloc(eccsize, GFP_KERNEL);
+	if (!erased_page)
+		goto fail;
+
+	memset(erased_page, 0xff, eccsize);
+	memset(nbc->eccmask, 0, eccbytes);
+	encode_bch(nbc->bch, erased_page, eccsize, nbc->eccmask);
+	kfree(erased_page);
+
+	for (i = 0; i < eccbytes; i++)
+		nbc->eccmask[i] ^= 0xff;
+
+	return nbc;
+fail:
+	nand_bch_free(nbc);
+	return NULL;
+}
+EXPORT_SYMBOL(nand_bch_init);
+
+/**
+ * nand_bch_free - [NAND Interface] Release NAND BCH ECC resources
+ * @nbc:	NAND BCH control structure
+ */
+void nand_bch_free(struct nand_bch_control *nbc)
+{
+	if (nbc) {
+		free_bch(nbc->bch);
+		kfree(nbc->errloc);
+		kfree(nbc->eccmask);
+		kfree(nbc);
+	}
+}
+EXPORT_SYMBOL(nand_bch_free);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
+MODULE_DESCRIPTION("NAND software BCH ECC support");

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index a5aa99f..213181b 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c

@@ -34,6 +34,7 @@
 #include <linux/string.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
+#include <linux/mtd/nand_bch.h>
 #include <linux/mtd/partitions.h>
 #include <linux/delay.h>
 #include <linux/list.h>
@@ -108,6 +109,7 @@
 static unsigned int overridesize = 0;
 static char *cache_file = NULL;
 static unsigned int bbt;
+static unsigned int bch;
 
 module_param(first_id_byte,  uint, 0400);
 module_param(second_id_byte, uint, 0400);
@@ -132,6 +134,7 @@
 module_param(overridesize,   uint, 0400);
 module_param(cache_file,     charp, 0400);
 module_param(bbt,	     uint, 0400);
+module_param(bch,	     uint, 0400);
 
 MODULE_PARM_DESC(first_id_byte,  "The first byte returned by NAND Flash 'read ID' command (manufacturer ID)");
 MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID)");
@@ -165,6 +168,8 @@
 				 " e.g. 5 means a size of 32 erase blocks");
 MODULE_PARM_DESC(cache_file,     "File to use to cache nand pages instead of memory");
 MODULE_PARM_DESC(bbt,		 "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in data area");
+MODULE_PARM_DESC(bch,		 "Enable BCH ecc and set how many bits should "
+				 "be correctable in 512-byte blocks");
 
 /* The largest possible page size */
 #define NS_LARGEST_PAGE_SIZE	4096
@@ -2309,7 +2314,43 @@
 	if ((retval = parse_gravepages()) != 0)
 		goto error;
 
-	if ((retval = nand_scan(nsmtd, 1)) != 0) {
+	retval = nand_scan_ident(nsmtd, 1, NULL);
+	if (retval) {
+		NS_ERR("cannot scan NAND Simulator device\n");
+		if (retval > 0)
+			retval = -ENXIO;
+		goto error;
+	}
+
+	if (bch) {
+		unsigned int eccsteps, eccbytes;
+		if (!mtd_nand_has_bch()) {
+			NS_ERR("BCH ECC support is disabled\n");
+			retval = -EINVAL;
+			goto error;
+		}
+		/* use 512-byte ecc blocks */
+		eccsteps = nsmtd->writesize/512;
+		eccbytes = (bch*13+7)/8;
+		/* do not bother supporting small page devices */
+		if ((nsmtd->oobsize < 64) || !eccsteps) {
+			NS_ERR("bch not available on small page devices\n");
+			retval = -EINVAL;
+			goto error;
+		}
+		if ((eccbytes*eccsteps+2) > nsmtd->oobsize) {
+			NS_ERR("invalid bch value %u\n", bch);
+			retval = -EINVAL;
+			goto error;
+		}
+		chip->ecc.mode = NAND_ECC_SOFT_BCH;
+		chip->ecc.size = 512;
+		chip->ecc.bytes = eccbytes;
+		NS_INFO("using %u-bit/%u bytes BCH ECC\n", bch, chip->ecc.size);
+	}
+
+	retval = nand_scan_tail(nsmtd);
+	if (retval) {
 		NS_ERR("can't register NAND Simulator\n");
 		if (retval > 0)
 			retval = -ENXIO;

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 7b8f1ff..da9a351 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c

@@ -668,6 +668,8 @@
  *
  * This function compares two ECC's and indicates if there is an error.
  * If the error can be corrected it will be corrected to the buffer.
+ * If there is no error, %0 is returned. If there is an error but it
+ * was corrected, %1 is returned. Otherwise, %-1 is returned.
  */
 static int omap_compare_ecc(u8 *ecc_data1,	/* read from NAND memory */
 			    u8 *ecc_data2,	/* read from register */
@@ -773,7 +775,7 @@
 
 		page_data[find_byte] ^= (1 << find_bit);
 
-		return 0;
+		return 1;
 	default:
 		if (isEccFF) {
 			if (ecc_data2[0] == 0 &&
@@ -794,8 +796,11 @@
  * @calc_ecc: ecc read from HW ECC registers
  *
  * Compares the ecc read from nand spare area with ECC registers values
- * and if ECC's mismached, it will call 'omap_compare_ecc' for error detection
- * and correction.
+ * and if ECC's mismatched, it will call 'omap_compare_ecc' for error
+ * detection and correction. If there are no errors, %0 is returned. If
+ * there were errors and all of the errors were corrected, the number of
+ * corrected errors is returned. If uncorrectable errors exist, %-1 is
+ * returned.
  */
 static int omap_correct_data(struct mtd_info *mtd, u_char *dat,
 				u_char *read_ecc, u_char *calc_ecc)
@@ -803,6 +808,7 @@
 	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
 							mtd);
 	int blockCnt = 0, i = 0, ret = 0;
+	int stat = 0;
 
 	/* Ex NAND_ECC_HW12_2048 */
 	if ((info->nand.ecc.mode == NAND_ECC_HW) &&
@@ -816,12 +822,14 @@
 			ret = omap_compare_ecc(read_ecc, calc_ecc, dat);
 			if (ret < 0)
 				return ret;
+			/* keep track of the number of corrected errors */
+			stat += ret;
 		}
 		read_ecc += 3;
 		calc_ecc += 3;
 		dat      += 512;
 	}
-	return 0;
+	return stat;
 }
 
 /**

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index ea2c288..ab7f4c3 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c

@@ -27,6 +27,8 @@
 #include <plat/pxa3xx_nand.h>
 
 #define	CHIP_DELAY_TIMEOUT	(2 * HZ/10)
+#define NAND_STOP_DELAY		(2 * HZ/50)
+#define PAGE_CHUNK_SIZE		(2048)
 
 /* registers and bit definitions */
 #define NDCR		(0x00) /* Control register */
@@ -52,16 +54,18 @@
 #define NDCR_ND_MODE		(0x3 << 21)
 #define NDCR_NAND_MODE   	(0x0)
 #define NDCR_CLR_PG_CNT		(0x1 << 20)
-#define NDCR_CLR_ECC		(0x1 << 19)
+#define NDCR_STOP_ON_UNCOR	(0x1 << 19)
 #define NDCR_RD_ID_CNT_MASK	(0x7 << 16)
 #define NDCR_RD_ID_CNT(x)	(((x) << 16) & NDCR_RD_ID_CNT_MASK)
 
 #define NDCR_RA_START		(0x1 << 15)
 #define NDCR_PG_PER_BLK		(0x1 << 14)
 #define NDCR_ND_ARB_EN		(0x1 << 12)
+#define NDCR_INT_MASK           (0xFFF)
 
 #define NDSR_MASK		(0xfff)
-#define NDSR_RDY		(0x1 << 11)
+#define NDSR_RDY                (0x1 << 12)
+#define NDSR_FLASH_RDY          (0x1 << 11)
 #define NDSR_CS0_PAGED		(0x1 << 10)
 #define NDSR_CS1_PAGED		(0x1 << 9)
 #define NDSR_CS0_CMDD		(0x1 << 8)
@@ -74,6 +78,7 @@
 #define NDSR_RDDREQ		(0x1 << 1)
 #define NDSR_WRCMDREQ		(0x1)
 
+#define NDCB0_ST_ROW_EN         (0x1 << 26)
 #define NDCB0_AUTO_RS		(0x1 << 25)
 #define NDCB0_CSEL		(0x1 << 24)
 #define NDCB0_CMD_TYPE_MASK	(0x7 << 21)
@@ -104,18 +109,21 @@
 };
 
 enum {
-	STATE_READY	= 0,
+	STATE_IDLE = 0,
 	STATE_CMD_HANDLE,
 	STATE_DMA_READING,
 	STATE_DMA_WRITING,
 	STATE_DMA_DONE,
 	STATE_PIO_READING,
 	STATE_PIO_WRITING,
+	STATE_CMD_DONE,
+	STATE_READY,
 };
 
 struct pxa3xx_nand_info {
 	struct nand_chip	nand_chip;
 
+	struct nand_hw_control	controller;
 	struct platform_device	 *pdev;
 	struct pxa3xx_nand_cmdset *cmdset;
 
@@ -126,6 +134,7 @@
 	unsigned int 		buf_start;
 	unsigned int		buf_count;
 
+	struct mtd_info         *mtd;
 	/* DMA information */
 	int			drcmr_dat;
 	int			drcmr_cmd;
@@ -149,6 +158,7 @@
 
 	int			use_ecc;	/* use HW ECC ? */
 	int			use_dma;	/* use DMA ? */
+	int			is_ready;
 
 	unsigned int		page_size;	/* page size of attached chip */
 	unsigned int		data_size;	/* data size in FIFO */
@@ -201,20 +211,22 @@
 };
 
 static struct pxa3xx_nand_flash builtin_flash_types[] = {
-	{      0,   0, 2048,  8,  8,    0, &default_cmdset, &timing[0] },
-	{ 0x46ec,  32,  512, 16, 16, 4096, &default_cmdset, &timing[1] },
-	{ 0xdaec,  64, 2048,  8,  8, 2048, &default_cmdset, &timing[1] },
-	{ 0xd7ec, 128, 4096,  8,  8, 8192, &default_cmdset, &timing[1] },
-	{ 0xa12c,  64, 2048,  8,  8, 1024, &default_cmdset, &timing[2] },
-	{ 0xb12c,  64, 2048, 16, 16, 1024, &default_cmdset, &timing[2] },
-	{ 0xdc2c,  64, 2048,  8,  8, 4096, &default_cmdset, &timing[2] },
-	{ 0xcc2c,  64, 2048, 16, 16, 4096, &default_cmdset, &timing[2] },
-	{ 0xba20,  64, 2048, 16, 16, 2048, &default_cmdset, &timing[3] },
+{ "DEFAULT FLASH",      0,   0, 2048,  8,  8,    0, &timing[0] },
+{ "64MiB 16-bit",  0x46ec,  32,  512, 16, 16, 4096, &timing[1] },
+{ "256MiB 8-bit",  0xdaec,  64, 2048,  8,  8, 2048, &timing[1] },
+{ "4GiB 8-bit",    0xd7ec, 128, 4096,  8,  8, 8192, &timing[1] },
+{ "128MiB 8-bit",  0xa12c,  64, 2048,  8,  8, 1024, &timing[2] },
+{ "128MiB 16-bit", 0xb12c,  64, 2048, 16, 16, 1024, &timing[2] },
+{ "512MiB 8-bit",  0xdc2c,  64, 2048,  8,  8, 4096, &timing[2] },
+{ "512MiB 16-bit", 0xcc2c,  64, 2048, 16, 16, 4096, &timing[2] },
+{ "256MiB 16-bit", 0xba20,  64, 2048, 16, 16, 2048, &timing[3] },
 };
 
 /* Define a default flash type setting serve as flash detecting only */
 #define DEFAULT_FLASH_TYPE (&builtin_flash_types[0])
 
+const char *mtd_names[] = {"pxa3xx_nand-0", NULL};
+
 #define NDTR0_tCH(c)	(min((c), 7) << 19)
 #define NDTR0_tCS(c)	(min((c), 7) << 16)
 #define NDTR0_tWH(c)	(min((c), 7) << 11)
@@ -252,25 +264,6 @@
 	nand_writel(info, NDTR1CS0, ndtr1);
 }
 
-#define WAIT_EVENT_TIMEOUT	10
-
-static int wait_for_event(struct pxa3xx_nand_info *info, uint32_t event)
-{
-	int timeout = WAIT_EVENT_TIMEOUT;
-	uint32_t ndsr;
-
-	while (timeout--) {
-		ndsr = nand_readl(info, NDSR) & NDSR_MASK;
-		if (ndsr & event) {
-			nand_writel(info, NDSR, ndsr);
-			return 0;
-		}
-		udelay(10);
-	}
-
-	return -ETIMEDOUT;
-}
-
 static void pxa3xx_set_datasize(struct pxa3xx_nand_info *info)
 {
 	int oob_enable = info->reg_ndcr & NDCR_SPARE_EN;
@@ -291,69 +284,45 @@
 	}
 }
 
-static int prepare_read_prog_cmd(struct pxa3xx_nand_info *info,
-		uint16_t cmd, int column, int page_addr)
+/**
+ * NOTE: it is a must to set ND_RUN firstly, then write
+ * command buffer, otherwise, it does not work.
+ * We enable all the interrupt at the same time, and
+ * let pxa3xx_nand_irq to handle all logic.
+ */
+static void pxa3xx_nand_start(struct pxa3xx_nand_info *info)
 {
-	const struct pxa3xx_nand_cmdset *cmdset = info->cmdset;
-	pxa3xx_set_datasize(info);
+	uint32_t ndcr;
 
-	/* generate values for NDCBx registers */
-	info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
-	info->ndcb1 = 0;
-	info->ndcb2 = 0;
-	info->ndcb0 |= NDCB0_ADDR_CYC(info->row_addr_cycles + info->col_addr_cycles);
+	ndcr = info->reg_ndcr;
+	ndcr |= info->use_ecc ? NDCR_ECC_EN : 0;
+	ndcr |= info->use_dma ? NDCR_DMA_EN : 0;
+	ndcr |= NDCR_ND_RUN;
 
-	if (info->col_addr_cycles == 2) {
-		/* large block, 2 cycles for column address
-		 * row address starts from 3rd cycle
-		 */
-		info->ndcb1 |= page_addr << 16;
-		if (info->row_addr_cycles == 3)
-			info->ndcb2 = (page_addr >> 16) & 0xff;
-	} else
-		/* small block, 1 cycles for column address
-		 * row address starts from 2nd cycle
-		 */
-		info->ndcb1 = page_addr << 8;
-
-	if (cmd == cmdset->program)
-		info->ndcb0 |= NDCB0_CMD_TYPE(1) | NDCB0_AUTO_RS;
-
-	return 0;
+	/* clear status bits and run */
+	nand_writel(info, NDCR, 0);
+	nand_writel(info, NDSR, NDSR_MASK);
+	nand_writel(info, NDCR, ndcr);
 }
 
-static int prepare_erase_cmd(struct pxa3xx_nand_info *info,
-			uint16_t cmd, int page_addr)
+static void pxa3xx_nand_stop(struct pxa3xx_nand_info *info)
 {
-	info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
-	info->ndcb0 |= NDCB0_CMD_TYPE(2) | NDCB0_AUTO_RS | NDCB0_ADDR_CYC(3);
-	info->ndcb1 = page_addr;
-	info->ndcb2 = 0;
-	return 0;
-}
+	uint32_t ndcr;
+	int timeout = NAND_STOP_DELAY;
 
-static int prepare_other_cmd(struct pxa3xx_nand_info *info, uint16_t cmd)
-{
-	const struct pxa3xx_nand_cmdset *cmdset = info->cmdset;
+	/* wait RUN bit in NDCR become 0 */
+	ndcr = nand_readl(info, NDCR);
+	while ((ndcr & NDCR_ND_RUN) && (timeout-- > 0)) {
+		ndcr = nand_readl(info, NDCR);
+		udelay(1);
+	}
 
-	info->ndcb0 = cmd | ((cmd & 0xff00) ? NDCB0_DBC : 0);
-	info->ndcb1 = 0;
-	info->ndcb2 = 0;
-
-	info->oob_size = 0;
-	if (cmd == cmdset->read_id) {
-		info->ndcb0 |= NDCB0_CMD_TYPE(3);
-		info->data_size = 8;
-	} else if (cmd == cmdset->read_status) {
-		info->ndcb0 |= NDCB0_CMD_TYPE(4);
-		info->data_size = 8;
-	} else if (cmd == cmdset->reset || cmd == cmdset->lock ||
-		   cmd == cmdset->unlock) {
-		info->ndcb0 |= NDCB0_CMD_TYPE(5);
-	} else
-		return -EINVAL;
-
-	return 0;
+	if (timeout <= 0) {
+		ndcr &= ~NDCR_ND_RUN;
+		nand_writel(info, NDCR, ndcr);
+	}
+	/* clear status bits */
+	nand_writel(info, NDSR, NDSR_MASK);
 }
 
 static void enable_int(struct pxa3xx_nand_info *info, uint32_t int_mask)
@@ -372,39 +341,8 @@
 	nand_writel(info, NDCR, ndcr | int_mask);
 }
 
-/* NOTE: it is a must to set ND_RUN firstly, then write command buffer
- * otherwise, it does not work
- */
-static int write_cmd(struct pxa3xx_nand_info *info)
+static void handle_data_pio(struct pxa3xx_nand_info *info)
 {
-	uint32_t ndcr;
-
-	/* clear status bits and run */
-	nand_writel(info, NDSR, NDSR_MASK);
-
-	ndcr = info->reg_ndcr;
-
-	ndcr |= info->use_ecc ? NDCR_ECC_EN : 0;
-	ndcr |= info->use_dma ? NDCR_DMA_EN : 0;
-	ndcr |= NDCR_ND_RUN;
-
-	nand_writel(info, NDCR, ndcr);
-
-	if (wait_for_event(info, NDSR_WRCMDREQ)) {
-		printk(KERN_ERR "timed out writing command\n");
-		return -ETIMEDOUT;
-	}
-
-	nand_writel(info, NDCB0, info->ndcb0);
-	nand_writel(info, NDCB0, info->ndcb1);
-	nand_writel(info, NDCB0, info->ndcb2);
-	return 0;
-}
-
-static int handle_data_pio(struct pxa3xx_nand_info *info)
-{
-	int ret, timeout = CHIP_DELAY_TIMEOUT;
-
 	switch (info->state) {
 	case STATE_PIO_WRITING:
 		__raw_writesl(info->mmio_base + NDDB, info->data_buff,
@@ -412,14 +350,6 @@
 		if (info->oob_size > 0)
 			__raw_writesl(info->mmio_base + NDDB, info->oob_buff,
 					DIV_ROUND_UP(info->oob_size, 4));
-
-		enable_int(info, NDSR_CS0_BBD | NDSR_CS0_CMDD);
-
-		ret = wait_for_completion_timeout(&info->cmd_complete, timeout);
-		if (!ret) {
-			printk(KERN_ERR "program command time out\n");
-			return -1;
-		}
 		break;
 	case STATE_PIO_READING:
 		__raw_readsl(info->mmio_base + NDDB, info->data_buff,
@@ -431,14 +361,11 @@
 	default:
 		printk(KERN_ERR "%s: invalid state %d\n", __func__,
 				info->state);
-		return -EINVAL;
+		BUG();
 	}
-
-	info->state = STATE_READY;
-	return 0;
 }
 
-static void start_data_dma(struct pxa3xx_nand_info *info, int dir_out)
+static void start_data_dma(struct pxa3xx_nand_info *info)
 {
 	struct pxa_dma_desc *desc = info->data_desc;
 	int dma_len = ALIGN(info->data_size + info->oob_size, 32);
@@ -446,14 +373,21 @@
 	desc->ddadr = DDADR_STOP;
 	desc->dcmd = DCMD_ENDIRQEN | DCMD_WIDTH4 | DCMD_BURST32 | dma_len;
 
-	if (dir_out) {
+	switch (info->state) {
+	case STATE_DMA_WRITING:
 		desc->dsadr = info->data_buff_phys;
 		desc->dtadr = info->mmio_phys + NDDB;
 		desc->dcmd |= DCMD_INCSRCADDR | DCMD_FLOWTRG;
-	} else {
+		break;
+	case STATE_DMA_READING:
 		desc->dtadr = info->data_buff_phys;
 		desc->dsadr = info->mmio_phys + NDDB;
 		desc->dcmd |= DCMD_INCTRGADDR | DCMD_FLOWSRC;
+		break;
+	default:
+		printk(KERN_ERR "%s: invalid state %d\n", __func__,
+				info->state);
+		BUG();
 	}
 
 	DRCMR(info->drcmr_dat) = DRCMR_MAPVLD | info->data_dma_ch;
@@ -471,95 +405,64 @@
 
 	if (dcsr & DCSR_BUSERR) {
 		info->retcode = ERR_DMABUSERR;
-		complete(&info->cmd_complete);
 	}
 
-	if (info->state == STATE_DMA_WRITING) {
-		info->state = STATE_DMA_DONE;
-		enable_int(info, NDSR_CS0_BBD | NDSR_CS0_CMDD);
-	} else {
-		info->state = STATE_READY;
-		complete(&info->cmd_complete);
-	}
+	info->state = STATE_DMA_DONE;
+	enable_int(info, NDCR_INT_MASK);
+	nand_writel(info, NDSR, NDSR_WRDREQ | NDSR_RDDREQ);
 }
 
 static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
 {
 	struct pxa3xx_nand_info *info = devid;
-	unsigned int status;
+	unsigned int status, is_completed = 0;
 
 	status = nand_readl(info, NDSR);
 
-	if (status & (NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR)) {
-		if (status & NDSR_DBERR)
-			info->retcode = ERR_DBERR;
-		else if (status & NDSR_SBERR)
-			info->retcode = ERR_SBERR;
-
-		disable_int(info, NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR);
-
+	if (status & NDSR_DBERR)
+		info->retcode = ERR_DBERR;
+	if (status & NDSR_SBERR)
+		info->retcode = ERR_SBERR;
+	if (status & (NDSR_RDDREQ | NDSR_WRDREQ)) {
+		/* whether use dma to transfer data */
 		if (info->use_dma) {
-			info->state = STATE_DMA_READING;
-			start_data_dma(info, 0);
+			disable_int(info, NDCR_INT_MASK);
+			info->state = (status & NDSR_RDDREQ) ?
+				      STATE_DMA_READING : STATE_DMA_WRITING;
+			start_data_dma(info);
+			goto NORMAL_IRQ_EXIT;
 		} else {
-			info->state = STATE_PIO_READING;
-			complete(&info->cmd_complete);
+			info->state = (status & NDSR_RDDREQ) ?
+				      STATE_PIO_READING : STATE_PIO_WRITING;
+			handle_data_pio(info);
 		}
-	} else if (status & NDSR_WRDREQ) {
-		disable_int(info, NDSR_WRDREQ);
-		if (info->use_dma) {
-			info->state = STATE_DMA_WRITING;
-			start_data_dma(info, 1);
-		} else {
-			info->state = STATE_PIO_WRITING;
-			complete(&info->cmd_complete);
-		}
-	} else if (status & (NDSR_CS0_BBD | NDSR_CS0_CMDD)) {
-		if (status & NDSR_CS0_BBD)
-			info->retcode = ERR_BBERR;
-
-		disable_int(info, NDSR_CS0_BBD | NDSR_CS0_CMDD);
+	}
+	if (status & NDSR_CS0_CMDD) {
+		info->state = STATE_CMD_DONE;
+		is_completed = 1;
+	}
+	if (status & NDSR_FLASH_RDY) {
+		info->is_ready = 1;
 		info->state = STATE_READY;
-		complete(&info->cmd_complete);
 	}
+
+	if (status & NDSR_WRCMDREQ) {
+		nand_writel(info, NDSR, NDSR_WRCMDREQ);
+		status &= ~NDSR_WRCMDREQ;
+		info->state = STATE_CMD_HANDLE;
+		nand_writel(info, NDCB0, info->ndcb0);
+		nand_writel(info, NDCB0, info->ndcb1);
+		nand_writel(info, NDCB0, info->ndcb2);
+	}
+
+	/* clear NDSR to let the controller exit the IRQ */
 	nand_writel(info, NDSR, status);
+	if (is_completed)
+		complete(&info->cmd_complete);
+NORMAL_IRQ_EXIT:
 	return IRQ_HANDLED;
 }
 
-static int pxa3xx_nand_do_cmd(struct pxa3xx_nand_info *info, uint32_t event)
-{
-	uint32_t ndcr;
-	int ret, timeout = CHIP_DELAY_TIMEOUT;
-
-	if (write_cmd(info)) {
-		info->retcode = ERR_SENDCMD;
-		goto fail_stop;
-	}
-
-	info->state = STATE_CMD_HANDLE;
-
-	enable_int(info, event);
-
-	ret = wait_for_completion_timeout(&info->cmd_complete, timeout);
-	if (!ret) {
-		printk(KERN_ERR "command execution timed out\n");
-		info->retcode = ERR_SENDCMD;
-		goto fail_stop;
-	}
-
-	if (info->use_dma == 0 && info->data_size > 0)
-		if (handle_data_pio(info))
-			goto fail_stop;
-
-	return 0;
-
-fail_stop:
-	ndcr = nand_readl(info, NDCR);
-	nand_writel(info, NDCR, ndcr & ~NDCR_ND_RUN);
-	udelay(10);
-	return -ETIMEDOUT;
-}
-
 static int pxa3xx_nand_dev_ready(struct mtd_info *mtd)
 {
 	struct pxa3xx_nand_info *info = mtd->priv;
@@ -574,125 +477,218 @@
 	return 1;
 }
 
+static int prepare_command_pool(struct pxa3xx_nand_info *info, int command,
+		uint16_t column, int page_addr)
+{
+	uint16_t cmd;
+	int addr_cycle, exec_cmd, ndcb0;
+	struct mtd_info *mtd = info->mtd;
+
+	ndcb0 = 0;
+	addr_cycle = 0;
+	exec_cmd = 1;
+
+	/* reset data and oob column point to handle data */
+	info->buf_start		= 0;
+	info->buf_count		= 0;
+	info->oob_size		= 0;
+	info->use_ecc		= 0;
+	info->is_ready		= 0;
+	info->retcode		= ERR_NONE;
+
+	switch (command) {
+	case NAND_CMD_READ0:
+	case NAND_CMD_PAGEPROG:
+		info->use_ecc = 1;
+	case NAND_CMD_READOOB:
+		pxa3xx_set_datasize(info);
+		break;
+	case NAND_CMD_SEQIN:
+		exec_cmd = 0;
+		break;
+	default:
+		info->ndcb1 = 0;
+		info->ndcb2 = 0;
+		break;
+	}
+
+	info->ndcb0 = ndcb0;
+	addr_cycle = NDCB0_ADDR_CYC(info->row_addr_cycles
+				    + info->col_addr_cycles);
+
+	switch (command) {
+	case NAND_CMD_READOOB:
+	case NAND_CMD_READ0:
+		cmd = info->cmdset->read1;
+		if (command == NAND_CMD_READOOB)
+			info->buf_start = mtd->writesize + column;
+		else
+			info->buf_start = column;
+
+		if (unlikely(info->page_size < PAGE_CHUNK_SIZE))
+			info->ndcb0 |= NDCB0_CMD_TYPE(0)
+					| addr_cycle
+					| (cmd & NDCB0_CMD1_MASK);
+		else
+			info->ndcb0 |= NDCB0_CMD_TYPE(0)
+					| NDCB0_DBC
+					| addr_cycle
+					| cmd;
+
+	case NAND_CMD_SEQIN:
+		/* small page addr setting */
+		if (unlikely(info->page_size < PAGE_CHUNK_SIZE)) {
+			info->ndcb1 = ((page_addr & 0xFFFFFF) << 8)
+					| (column & 0xFF);
+
+			info->ndcb2 = 0;
+		} else {
+			info->ndcb1 = ((page_addr & 0xFFFF) << 16)
+					| (column & 0xFFFF);
+
+			if (page_addr & 0xFF0000)
+				info->ndcb2 = (page_addr & 0xFF0000) >> 16;
+			else
+				info->ndcb2 = 0;
+		}
+
+		info->buf_count = mtd->writesize + mtd->oobsize;
+		memset(info->data_buff, 0xFF, info->buf_count);
+
+		break;
+
+	case NAND_CMD_PAGEPROG:
+		if (is_buf_blank(info->data_buff,
+					(mtd->writesize + mtd->oobsize))) {
+			exec_cmd = 0;
+			break;
+		}
+
+		cmd = info->cmdset->program;
+		info->ndcb0 |= NDCB0_CMD_TYPE(0x1)
+				| NDCB0_AUTO_RS
+				| NDCB0_ST_ROW_EN
+				| NDCB0_DBC
+				| cmd
+				| addr_cycle;
+		break;
+
+	case NAND_CMD_READID:
+		cmd = info->cmdset->read_id;
+		info->buf_count = info->read_id_bytes;
+		info->ndcb0 |= NDCB0_CMD_TYPE(3)
+				| NDCB0_ADDR_CYC(1)
+				| cmd;
+
+		info->data_size = 8;
+		break;
+	case NAND_CMD_STATUS:
+		cmd = info->cmdset->read_status;
+		info->buf_count = 1;
+		info->ndcb0 |= NDCB0_CMD_TYPE(4)
+				| NDCB0_ADDR_CYC(1)
+				| cmd;
+
+		info->data_size = 8;
+		break;
+
+	case NAND_CMD_ERASE1:
+		cmd = info->cmdset->erase;
+		info->ndcb0 |= NDCB0_CMD_TYPE(2)
+				| NDCB0_AUTO_RS
+				| NDCB0_ADDR_CYC(3)
+				| NDCB0_DBC
+				| cmd;
+		info->ndcb1 = page_addr;
+		info->ndcb2 = 0;
+
+		break;
+	case NAND_CMD_RESET:
+		cmd = info->cmdset->reset;
+		info->ndcb0 |= NDCB0_CMD_TYPE(5)
+				| cmd;
+
+		break;
+
+	case NAND_CMD_ERASE2:
+		exec_cmd = 0;
+		break;
+
+	default:
+		exec_cmd = 0;
+		printk(KERN_ERR "pxa3xx-nand: non-supported"
+			" command %x\n", command);
+		break;
+	}
+
+	return exec_cmd;
+}
+
 static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 				int column, int page_addr)
 {
 	struct pxa3xx_nand_info *info = mtd->priv;
-	const struct pxa3xx_nand_cmdset *cmdset = info->cmdset;
-	int ret;
+	int ret, exec_cmd;
 
-	info->use_dma = (use_dma) ? 1 : 0;
-	info->use_ecc = 0;
-	info->data_size = 0;
-	info->state = STATE_READY;
+	/*
+	 * if this is a x16 device ,then convert the input
+	 * "byte" address into a "word" address appropriate
+	 * for indexing a word-oriented device
+	 */
+	if (info->reg_ndcr & NDCR_DWIDTH_M)
+		column /= 2;
 
-	init_completion(&info->cmd_complete);
+	exec_cmd = prepare_command_pool(info, command, column, page_addr);
+	if (exec_cmd) {
+		init_completion(&info->cmd_complete);
+		pxa3xx_nand_start(info);
 
-	switch (command) {
-	case NAND_CMD_READOOB:
-		/* disable HW ECC to get all the OOB data */
-		info->buf_count = mtd->writesize + mtd->oobsize;
-		info->buf_start = mtd->writesize + column;
-		memset(info->data_buff, 0xFF, info->buf_count);
-
-		if (prepare_read_prog_cmd(info, cmdset->read1, column, page_addr))
-			break;
-
-		pxa3xx_nand_do_cmd(info, NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR);
-
-		/* We only are OOB, so if the data has error, does not matter */
-		if (info->retcode == ERR_DBERR)
-			info->retcode = ERR_NONE;
-		break;
-
-	case NAND_CMD_READ0:
-		info->use_ecc = 1;
-		info->retcode = ERR_NONE;
-		info->buf_start = column;
-		info->buf_count = mtd->writesize + mtd->oobsize;
-		memset(info->data_buff, 0xFF, info->buf_count);
-
-		if (prepare_read_prog_cmd(info, cmdset->read1, column, page_addr))
-			break;
-
-		pxa3xx_nand_do_cmd(info, NDSR_RDDREQ | NDSR_DBERR | NDSR_SBERR);
-
-		if (info->retcode == ERR_DBERR) {
-			/* for blank page (all 0xff), HW will calculate its ECC as
-			 * 0, which is different from the ECC information within
-			 * OOB, ignore such double bit errors
-			 */
-			if (is_buf_blank(info->data_buff, mtd->writesize))
-				info->retcode = ERR_NONE;
+		ret = wait_for_completion_timeout(&info->cmd_complete,
+				CHIP_DELAY_TIMEOUT);
+		if (!ret) {
+			printk(KERN_ERR "Wait time out!!!\n");
+			/* Stop State Machine for next command cycle */
+			pxa3xx_nand_stop(info);
 		}
-		break;
-	case NAND_CMD_SEQIN:
-		info->buf_start = column;
-		info->buf_count = mtd->writesize + mtd->oobsize;
-		memset(info->data_buff, 0xff, info->buf_count);
+		info->state = STATE_IDLE;
+	}
+}
 
-		/* save column/page_addr for next CMD_PAGEPROG */
-		info->seqin_column = column;
-		info->seqin_page_addr = page_addr;
-		break;
-	case NAND_CMD_PAGEPROG:
-		info->use_ecc = (info->seqin_column >= mtd->writesize) ? 0 : 1;
+static void pxa3xx_nand_write_page_hwecc(struct mtd_info *mtd,
+		struct nand_chip *chip, const uint8_t *buf)
+{
+	chip->write_buf(mtd, buf, mtd->writesize);
+	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+}
 
-		if (prepare_read_prog_cmd(info, cmdset->program,
-				info->seqin_column, info->seqin_page_addr))
+static int pxa3xx_nand_read_page_hwecc(struct mtd_info *mtd,
+		struct nand_chip *chip, uint8_t *buf, int page)
+{
+	struct pxa3xx_nand_info *info = mtd->priv;
+
+	chip->read_buf(mtd, buf, mtd->writesize);
+	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	if (info->retcode == ERR_SBERR) {
+		switch (info->use_ecc) {
+		case 1:
+			mtd->ecc_stats.corrected++;
 			break;
-
-		pxa3xx_nand_do_cmd(info, NDSR_WRDREQ);
-		break;
-	case NAND_CMD_ERASE1:
-		if (prepare_erase_cmd(info, cmdset->erase, page_addr))
+		case 0:
+		default:
 			break;
-
-		pxa3xx_nand_do_cmd(info, NDSR_CS0_BBD | NDSR_CS0_CMDD);
-		break;
-	case NAND_CMD_ERASE2:
-		break;
-	case NAND_CMD_READID:
-	case NAND_CMD_STATUS:
-		info->use_dma = 0;	/* force PIO read */
-		info->buf_start = 0;
-		info->buf_count = (command == NAND_CMD_READID) ?
-				info->read_id_bytes : 1;
-
-		if (prepare_other_cmd(info, (command == NAND_CMD_READID) ?
-				cmdset->read_id : cmdset->read_status))
-			break;
-
-		pxa3xx_nand_do_cmd(info, NDSR_RDDREQ);
-		break;
-	case NAND_CMD_RESET:
-		if (prepare_other_cmd(info, cmdset->reset))
-			break;
-
-		ret = pxa3xx_nand_do_cmd(info, NDSR_CS0_CMDD);
-		if (ret == 0) {
-			int timeout = 2;
-			uint32_t ndcr;
-
-			while (timeout--) {
-				if (nand_readl(info, NDSR) & NDSR_RDY)
-					break;
-				msleep(10);
-			}
-
-			ndcr = nand_readl(info, NDCR);
-			nand_writel(info, NDCR, ndcr & ~NDCR_ND_RUN);
 		}
-		break;
-	default:
-		printk(KERN_ERR "non-supported command.\n");
-		break;
+	} else if (info->retcode == ERR_DBERR) {
+		/*
+		 * for blank page (all 0xff), HW will calculate its ECC as
+		 * 0, which is different from the ECC information within
+		 * OOB, ignore such double bit errors
+		 */
+		if (is_buf_blank(buf, mtd->writesize))
+			mtd->ecc_stats.failed++;
 	}
 
-	if (info->retcode == ERR_DBERR) {
-		printk(KERN_ERR "double bit error @ page %08x\n", page_addr);
-		info->retcode = ERR_NONE;
-	}
+	return 0;
 }
 
 static uint8_t pxa3xx_nand_read_byte(struct mtd_info *mtd)
@@ -769,73 +765,12 @@
 	return 0;
 }
 
-static void pxa3xx_nand_ecc_hwctl(struct mtd_info *mtd, int mode)
-{
-	return;
-}
-
-static int pxa3xx_nand_ecc_calculate(struct mtd_info *mtd,
-		const uint8_t *dat, uint8_t *ecc_code)
-{
-	return 0;
-}
-
-static int pxa3xx_nand_ecc_correct(struct mtd_info *mtd,
-		uint8_t *dat, uint8_t *read_ecc, uint8_t *calc_ecc)
-{
-	struct pxa3xx_nand_info *info = mtd->priv;
-	/*
-	 * Any error include ERR_SEND_CMD, ERR_DBERR, ERR_BUSERR, we
-	 * consider it as a ecc error which will tell the caller the
-	 * read fail We have distinguish all the errors, but the
-	 * nand_read_ecc only check this function return value
-	 *
-	 * Corrected (single-bit) errors must also be noted.
-	 */
-	if (info->retcode == ERR_SBERR)
-		return 1;
-	else if (info->retcode != ERR_NONE)
-		return -1;
-
-	return 0;
-}
-
-static int __readid(struct pxa3xx_nand_info *info, uint32_t *id)
-{
-	const struct pxa3xx_nand_cmdset *cmdset = info->cmdset;
-	uint32_t ndcr;
-	uint8_t  id_buff[8];
-
-	if (prepare_other_cmd(info, cmdset->read_id)) {
-		printk(KERN_ERR "failed to prepare command\n");
-		return -EINVAL;
-	}
-
-	/* Send command */
-	if (write_cmd(info))
-		goto fail_timeout;
-
-	/* Wait for CMDDM(command done successfully) */
-	if (wait_for_event(info, NDSR_RDDREQ))
-		goto fail_timeout;
-
-	__raw_readsl(info->mmio_base + NDDB, id_buff, 2);
-	*id = id_buff[0] | (id_buff[1] << 8);
-	return 0;
-
-fail_timeout:
-	ndcr = nand_readl(info, NDCR);
-	nand_writel(info, NDCR, ndcr & ~NDCR_ND_RUN);
-	udelay(10);
-	return -ETIMEDOUT;
-}
-
 static int pxa3xx_nand_config_flash(struct pxa3xx_nand_info *info,
 				    const struct pxa3xx_nand_flash *f)
 {
 	struct platform_device *pdev = info->pdev;
 	struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
-	uint32_t ndcr = 0x00000FFF; /* disable all interrupts */
+	uint32_t ndcr = 0x0; /* enable all interrupts */
 
 	if (f->page_size != 2048 && f->page_size != 512)
 		return -EINVAL;
@@ -844,9 +779,8 @@
 		return -EINVAL;
 
 	/* calculate flash information */
-	info->cmdset = f->cmdset;
+	info->cmdset = &default_cmdset;
 	info->page_size = f->page_size;
-	info->oob_buff = info->data_buff + f->page_size;
 	info->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
 
 	/* calculate addressing information */
@@ -876,87 +810,18 @@
 static int pxa3xx_nand_detect_config(struct pxa3xx_nand_info *info)
 {
 	uint32_t ndcr = nand_readl(info, NDCR);
-	struct nand_flash_dev *type = NULL;
-	uint32_t id = -1, page_per_block, num_blocks;
-	int i;
-
-	page_per_block = ndcr & NDCR_PG_PER_BLK ? 64 : 32;
 	info->page_size = ndcr & NDCR_PAGE_SZ ? 2048 : 512;
-	/* set info fields needed to __readid */
+	/* set info fields needed to read id */
 	info->read_id_bytes = (info->page_size == 2048) ? 4 : 2;
 	info->reg_ndcr = ndcr;
 	info->cmdset = &default_cmdset;
 
-	if (__readid(info, &id))
-		return -ENODEV;
-
-	/* Lookup the flash id */
-	id = (id >> 8) & 0xff;		/* device id is byte 2 */
-	for (i = 0; nand_flash_ids[i].name != NULL; i++) {
-		if (id == nand_flash_ids[i].id) {
-			type =  &nand_flash_ids[i];
-			break;
-		}
-	}
-
-	if (!type)
-		return -ENODEV;
-
-	/* fill the missing flash information */
-	i = __ffs(page_per_block * info->page_size);
-	num_blocks = type->chipsize << (20 - i);
-
-	/* calculate addressing information */
-	info->col_addr_cycles = (info->page_size == 2048) ? 2 : 1;
-
-	if (num_blocks * page_per_block > 65536)
-		info->row_addr_cycles = 3;
-	else
-		info->row_addr_cycles = 2;
-
 	info->ndtr0cs0 = nand_readl(info, NDTR0CS0);
 	info->ndtr1cs0 = nand_readl(info, NDTR1CS0);
 
 	return 0;
 }
 
-static int pxa3xx_nand_detect_flash(struct pxa3xx_nand_info *info,
-				    const struct pxa3xx_nand_platform_data *pdata)
-{
-	const struct pxa3xx_nand_flash *f;
-	uint32_t id = -1;
-	int i;
-
-	if (pdata->keep_config)
-		if (pxa3xx_nand_detect_config(info) == 0)
-			return 0;
-
-	/* we use default timing to detect id */
-	f = DEFAULT_FLASH_TYPE;
-	pxa3xx_nand_config_flash(info, f);
-	if (__readid(info, &id))
-		goto fail_detect;
-
-	for (i=0; i<ARRAY_SIZE(builtin_flash_types) + pdata->num_flash - 1; i++) {
-		/* we first choose the flash definition from platfrom */
-		if (i < pdata->num_flash)
-			f = pdata->flash + i;
-		else
-			f = &builtin_flash_types[i - pdata->num_flash + 1];
-		if (f->chip_id == id) {
-			dev_info(&info->pdev->dev, "detect chip id: 0x%x\n", id);
-			pxa3xx_nand_config_flash(info, f);
-			return 0;
-		}
-	}
-
-	dev_warn(&info->pdev->dev,
-		 "failed to detect configured nand flash; found %04x instead of\n",
-		 id);
-fail_detect:
-	return -ENODEV;
-}
-
 /* the maximum possible buffer size for large page with OOB data
  * is: 2048 + 64 = 2112 bytes, allocate a page here for both the
  * data buffer and the DMA descriptor
@@ -998,82 +863,144 @@
 	return 0;
 }
 
-static struct nand_ecclayout hw_smallpage_ecclayout = {
-	.eccbytes = 6,
-	.eccpos = {8, 9, 10, 11, 12, 13 },
-	.oobfree = { {2, 6} }
-};
-
-static struct nand_ecclayout hw_largepage_ecclayout = {
-	.eccbytes = 24,
-	.eccpos = {
-		40, 41, 42, 43, 44, 45, 46, 47,
-		48, 49, 50, 51, 52, 53, 54, 55,
-		56, 57, 58, 59, 60, 61, 62, 63},
-	.oobfree = { {2, 38} }
-};
-
-static void pxa3xx_nand_init_mtd(struct mtd_info *mtd,
-				 struct pxa3xx_nand_info *info)
+static int pxa3xx_nand_sensing(struct pxa3xx_nand_info *info)
 {
-	struct nand_chip *this = &info->nand_chip;
+	struct mtd_info *mtd = info->mtd;
+	struct nand_chip *chip = mtd->priv;
 
-	this->options = (info->reg_ndcr & NDCR_DWIDTH_C) ? NAND_BUSWIDTH_16: 0;
-
-	this->waitfunc		= pxa3xx_nand_waitfunc;
-	this->select_chip	= pxa3xx_nand_select_chip;
-	this->dev_ready		= pxa3xx_nand_dev_ready;
-	this->cmdfunc		= pxa3xx_nand_cmdfunc;
-	this->read_word		= pxa3xx_nand_read_word;
-	this->read_byte		= pxa3xx_nand_read_byte;
-	this->read_buf		= pxa3xx_nand_read_buf;
-	this->write_buf		= pxa3xx_nand_write_buf;
-	this->verify_buf	= pxa3xx_nand_verify_buf;
-
-	this->ecc.mode		= NAND_ECC_HW;
-	this->ecc.hwctl		= pxa3xx_nand_ecc_hwctl;
-	this->ecc.calculate	= pxa3xx_nand_ecc_calculate;
-	this->ecc.correct	= pxa3xx_nand_ecc_correct;
-	this->ecc.size		= info->page_size;
-
-	if (info->page_size == 2048)
-		this->ecc.layout = &hw_largepage_ecclayout;
+	/* use the common timing to make a try */
+	pxa3xx_nand_config_flash(info, &builtin_flash_types[0]);
+	chip->cmdfunc(mtd, NAND_CMD_RESET, 0, 0);
+	if (info->is_ready)
+		return 1;
 	else
-		this->ecc.layout = &hw_smallpage_ecclayout;
-
-	this->chip_delay = 25;
+		return 0;
 }
 
-static int pxa3xx_nand_probe(struct platform_device *pdev)
+static int pxa3xx_nand_scan(struct mtd_info *mtd)
 {
-	struct pxa3xx_nand_platform_data *pdata;
+	struct pxa3xx_nand_info *info = mtd->priv;
+	struct platform_device *pdev = info->pdev;
+	struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
+	struct nand_flash_dev pxa3xx_flash_ids[2] = { {NULL,}, {NULL,} };
+	const struct pxa3xx_nand_flash *f = NULL;
+	struct nand_chip *chip = mtd->priv;
+	uint32_t id = -1;
+	uint64_t chipsize;
+	int i, ret, num;
+
+	if (pdata->keep_config && !pxa3xx_nand_detect_config(info))
+		goto KEEP_CONFIG;
+
+	ret = pxa3xx_nand_sensing(info);
+	if (!ret) {
+		kfree(mtd);
+		info->mtd = NULL;
+		printk(KERN_INFO "There is no nand chip on cs 0!\n");
+
+		return -EINVAL;
+	}
+
+	chip->cmdfunc(mtd, NAND_CMD_READID, 0, 0);
+	id = *((uint16_t *)(info->data_buff));
+	if (id != 0)
+		printk(KERN_INFO "Detect a flash id %x\n", id);
+	else {
+		kfree(mtd);
+		info->mtd = NULL;
+		printk(KERN_WARNING "Read out ID 0, potential timing set wrong!!\n");
+
+		return -EINVAL;
+	}
+
+	num = ARRAY_SIZE(builtin_flash_types) + pdata->num_flash - 1;
+	for (i = 0; i < num; i++) {
+		if (i < pdata->num_flash)
+			f = pdata->flash + i;
+		else
+			f = &builtin_flash_types[i - pdata->num_flash + 1];
+
+		/* find the chip in default list */
+		if (f->chip_id == id)
+			break;
+	}
+
+	if (i >= (ARRAY_SIZE(builtin_flash_types) + pdata->num_flash - 1)) {
+		kfree(mtd);
+		info->mtd = NULL;
+		printk(KERN_ERR "ERROR!! flash not defined!!!\n");
+
+		return -EINVAL;
+	}
+
+	pxa3xx_nand_config_flash(info, f);
+	pxa3xx_flash_ids[0].name = f->name;
+	pxa3xx_flash_ids[0].id = (f->chip_id >> 8) & 0xffff;
+	pxa3xx_flash_ids[0].pagesize = f->page_size;
+	chipsize = (uint64_t)f->num_blocks * f->page_per_block * f->page_size;
+	pxa3xx_flash_ids[0].chipsize = chipsize >> 20;
+	pxa3xx_flash_ids[0].erasesize = f->page_size * f->page_per_block;
+	if (f->flash_width == 16)
+		pxa3xx_flash_ids[0].options = NAND_BUSWIDTH_16;
+KEEP_CONFIG:
+	if (nand_scan_ident(mtd, 1, pxa3xx_flash_ids))
+		return -ENODEV;
+	/* calculate addressing information */
+	info->col_addr_cycles = (mtd->writesize >= 2048) ? 2 : 1;
+	info->oob_buff = info->data_buff + mtd->writesize;
+	if ((mtd->size >> chip->page_shift) > 65536)
+		info->row_addr_cycles = 3;
+	else
+		info->row_addr_cycles = 2;
+	mtd->name = mtd_names[0];
+	chip->ecc.mode = NAND_ECC_HW;
+	chip->ecc.size = f->page_size;
+
+	chip->options = (f->flash_width == 16) ? NAND_BUSWIDTH_16 : 0;
+	chip->options |= NAND_NO_AUTOINCR;
+	chip->options |= NAND_NO_READRDY;
+
+	return nand_scan_tail(mtd);
+}
+
+static
+struct pxa3xx_nand_info *alloc_nand_resource(struct platform_device *pdev)
+{
 	struct pxa3xx_nand_info *info;
-	struct nand_chip *this;
+	struct nand_chip *chip;
 	struct mtd_info *mtd;
 	struct resource *r;
-	int ret = 0, irq;
-
-	pdata = pdev->dev.platform_data;
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "no platform data defined\n");
-		return -ENODEV;
-	}
+	int ret, irq;
 
 	mtd = kzalloc(sizeof(struct mtd_info) + sizeof(struct pxa3xx_nand_info),
 			GFP_KERNEL);
 	if (!mtd) {
 		dev_err(&pdev->dev, "failed to allocate memory\n");
-		return -ENOMEM;
+		return NULL;
 	}
 
 	info = (struct pxa3xx_nand_info *)(&mtd[1]);
+	chip = (struct nand_chip *)(&mtd[1]);
 	info->pdev = pdev;
-
-	this = &info->nand_chip;
+	info->mtd = mtd;
 	mtd->priv = info;
 	mtd->owner = THIS_MODULE;
 
+	chip->ecc.read_page	= pxa3xx_nand_read_page_hwecc;
+	chip->ecc.write_page	= pxa3xx_nand_write_page_hwecc;
+	chip->controller        = &info->controller;
+	chip->waitfunc		= pxa3xx_nand_waitfunc;
+	chip->select_chip	= pxa3xx_nand_select_chip;
+	chip->dev_ready		= pxa3xx_nand_dev_ready;
+	chip->cmdfunc		= pxa3xx_nand_cmdfunc;
+	chip->read_word		= pxa3xx_nand_read_word;
+	chip->read_byte		= pxa3xx_nand_read_byte;
+	chip->read_buf		= pxa3xx_nand_read_buf;
+	chip->write_buf		= pxa3xx_nand_write_buf;
+	chip->verify_buf	= pxa3xx_nand_verify_buf;
+
+	spin_lock_init(&chip->controller->lock);
+	init_waitqueue_head(&chip->controller->wq);
 	info->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(info->clk)) {
 		dev_err(&pdev->dev, "failed to get nand clock\n");
@@ -1141,43 +1068,12 @@
 		goto fail_free_buf;
 	}
 
-	ret = pxa3xx_nand_detect_flash(info, pdata);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to detect flash\n");
-		ret = -ENODEV;
-		goto fail_free_irq;
-	}
+	platform_set_drvdata(pdev, info);
 
-	pxa3xx_nand_init_mtd(mtd, info);
+	return info;
 
-	platform_set_drvdata(pdev, mtd);
-
-	if (nand_scan(mtd, 1)) {
-		dev_err(&pdev->dev, "failed to scan nand\n");
-		ret = -ENXIO;
-		goto fail_free_irq;
-	}
-
-#ifdef CONFIG_MTD_PARTITIONS
-	if (mtd_has_cmdlinepart()) {
-		static const char *probes[] = { "cmdlinepart", NULL };
-		struct mtd_partition *parts;
-		int nr_parts;
-
-		nr_parts = parse_mtd_partitions(mtd, probes, &parts, 0);
-
-		if (nr_parts)
-			return add_mtd_partitions(mtd, parts, nr_parts);
-	}
-
-	return add_mtd_partitions(mtd, pdata->parts, pdata->nr_parts);
-#else
-	return 0;
-#endif
-
-fail_free_irq:
-	free_irq(irq, info);
 fail_free_buf:
+	free_irq(irq, info);
 	if (use_dma) {
 		pxa_free_dma(info->data_dma_ch);
 		dma_free_coherent(&pdev->dev, info->data_buff_size,
@@ -1193,22 +1089,18 @@
 	clk_put(info->clk);
 fail_free_mtd:
 	kfree(mtd);
-	return ret;
+	return NULL;
 }
 
 static int pxa3xx_nand_remove(struct platform_device *pdev)
 {
-	struct mtd_info *mtd = platform_get_drvdata(pdev);
-	struct pxa3xx_nand_info *info = mtd->priv;
+	struct pxa3xx_nand_info *info = platform_get_drvdata(pdev);
+	struct mtd_info *mtd = info->mtd;
 	struct resource *r;
 	int irq;
 
 	platform_set_drvdata(pdev, NULL);
 
-	del_mtd_device(mtd);
-#ifdef CONFIG_MTD_PARTITIONS
-	del_mtd_partitions(mtd);
-#endif
 	irq = platform_get_irq(pdev, 0);
 	if (irq >= 0)
 		free_irq(irq, info);
@@ -1226,17 +1118,62 @@
 	clk_disable(info->clk);
 	clk_put(info->clk);
 
-	kfree(mtd);
+	if (mtd) {
+		del_mtd_device(mtd);
+#ifdef CONFIG_MTD_PARTITIONS
+		del_mtd_partitions(mtd);
+#endif
+		kfree(mtd);
+	}
 	return 0;
 }
 
+static int pxa3xx_nand_probe(struct platform_device *pdev)
+{
+	struct pxa3xx_nand_platform_data *pdata;
+	struct pxa3xx_nand_info *info;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform data defined\n");
+		return -ENODEV;
+	}
+
+	info = alloc_nand_resource(pdev);
+	if (info == NULL)
+		return -ENOMEM;
+
+	if (pxa3xx_nand_scan(info->mtd)) {
+		dev_err(&pdev->dev, "failed to scan nand\n");
+		pxa3xx_nand_remove(pdev);
+		return -ENODEV;
+	}
+
+#ifdef CONFIG_MTD_PARTITIONS
+	if (mtd_has_cmdlinepart()) {
+		const char *probes[] = { "cmdlinepart", NULL };
+		struct mtd_partition *parts;
+		int nr_parts;
+
+		nr_parts = parse_mtd_partitions(info->mtd, probes, &parts, 0);
+
+		if (nr_parts)
+			return add_mtd_partitions(info->mtd, parts, nr_parts);
+	}
+
+	return add_mtd_partitions(info->mtd, pdata->parts, pdata->nr_parts);
+#else
+	return 0;
+#endif
+}
+
 #ifdef CONFIG_PM
 static int pxa3xx_nand_suspend(struct platform_device *pdev, pm_message_t state)
 {
-	struct mtd_info *mtd = (struct mtd_info *)platform_get_drvdata(pdev);
-	struct pxa3xx_nand_info *info = mtd->priv;
+	struct pxa3xx_nand_info *info = platform_get_drvdata(pdev);
+	struct mtd_info *mtd = info->mtd;
 
-	if (info->state != STATE_READY) {
+	if (info->state) {
 		dev_err(&pdev->dev, "driver busy, state = %d\n", info->state);
 		return -EAGAIN;
 	}
@@ -1246,8 +1183,8 @@
 
 static int pxa3xx_nand_resume(struct platform_device *pdev)
 {
-	struct mtd_info *mtd = (struct mtd_info *)platform_get_drvdata(pdev);
-	struct pxa3xx_nand_info *info = mtd->priv;
+	struct pxa3xx_nand_info *info = platform_get_drvdata(pdev);
+	struct mtd_info *mtd = info->mtd;
 
 	nand_writel(info, NDTR0CS0, info->ndtr0cs0);
 	nand_writel(info, NDTR1CS0, info->ndtr1cs0);

diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 14a49ab..f591f61 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c

@@ -629,6 +629,7 @@
 {
 	struct omap_onenand_platform_data *pdata;
 	struct omap2_onenand *c;
+	struct onenand_chip *this;
 	int r;
 
 	pdata = pdev->dev.platform_data;
@@ -726,9 +727,8 @@
 
 	c->mtd.dev.parent = &pdev->dev;
 
+	this = &c->onenand;
 	if (c->dma_channel >= 0) {
-		struct onenand_chip *this = &c->onenand;
-
 		this->wait = omap2_onenand_wait;
 		if (cpu_is_omap34xx()) {
 			this->read_bufferram = omap3_onenand_read_bufferram;
@@ -749,6 +749,9 @@
 		c->onenand.disable = omap2_onenand_disable;
 	}
 
+	if (pdata->skip_initial_unlocking)
+		this->options |= ONENAND_SKIP_INITIAL_UNLOCKING;
+
 	if ((r = onenand_scan(&c->mtd, 1)) < 0)
 		goto err_release_regulator;
 

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index bac41ca..56a8b20 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c

@@ -1132,6 +1132,8 @@
 			onenand_update_bufferram(mtd, from, !ret);
 			if (ret == -EBADMSG)
 				ret = 0;
+			if (ret)
+				break;
 		}
 
 		this->read_bufferram(mtd, ONENAND_DATARAM, buf, column, thislen);
@@ -1646,11 +1648,10 @@
 	int ret = 0;
 	int thislen, column;
 
+	column = addr & (this->writesize - 1);
+
 	while (len != 0) {
-		thislen = min_t(int, this->writesize, len);
-		column = addr & (this->writesize - 1);
-		if (column + thislen > this->writesize)
-			thislen = this->writesize - column;
+		thislen = min_t(int, this->writesize - column, len);
 
 		this->command(mtd, ONENAND_CMD_READ, addr, this->writesize);
 
@@ -1664,12 +1665,13 @@
 
 		this->read_bufferram(mtd, ONENAND_DATARAM, this->verify_buf, 0, mtd->writesize);
 
-		if (memcmp(buf, this->verify_buf, thislen))
+		if (memcmp(buf, this->verify_buf + column, thislen))
 			return -EBADMSG;
 
 		len -= thislen;
 		buf += thislen;
 		addr += thislen;
+		column = 0;
 	}
 
 	return 0;
@@ -4083,7 +4085,8 @@
 	mtd->writebufsize = mtd->writesize;
 
 	/* Unlock whole block */
-	this->unlock_all(mtd);
+	if (!(this->options & ONENAND_SKIP_INITIAL_UNLOCKING))
+		this->unlock_all(mtd);
 
 	ret = this->scan_bbt(mtd);
 	if ((!FLEXONENAND(this)) || ret)

diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index ac0d6a8..2b0daae 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c

@@ -64,12 +64,16 @@
 					SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET);
 
 	char *vendor = kmalloc(vendor_len, GFP_KERNEL);
+	if (!vendor)
+		goto error1;
 	memcpy(vendor, ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, vendor_len);
 	vendor[vendor_len] = 0;
 
 	/* Initialize sysfs attributes */
 	vendor_attribute =
 		kzalloc(sizeof(struct sm_sysfs_attribute), GFP_KERNEL);
+	if (!vendor_attribute)
+		goto error2;
 
 	sysfs_attr_init(&vendor_attribute->dev_attr.attr);
 
@@ -83,12 +87,24 @@
 	/* Create array of pointers to the attributes */
 	attributes = kzalloc(sizeof(struct attribute *) * (NUM_ATTRIBUTES + 1),
 								GFP_KERNEL);
+	if (!attributes)
+		goto error3;
 	attributes[0] = &vendor_attribute->dev_attr.attr;
 
 	/* Finally create the attribute group */
 	attr_group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
+	if (!attr_group)
+		goto error4;
 	attr_group->attrs = attributes;
 	return attr_group;
+error4:
+	kfree(attributes);
+error3:
+	kfree(vendor_attribute);
+error2:
+	kfree(vendor);
+error1:
+	return NULL;
 }
 
 void sm_delete_sysfs_attributes(struct sm_ftl *ftl)
@@ -1178,6 +1194,8 @@
 	}
 
 	ftl->disk_attributes = sm_create_sysfs_attributes(ftl);
+	if (!ftl->disk_attributes)
+		goto error6;
 	trans->disk_attributes = ftl->disk_attributes;
 
 	sm_printk("Found %d MiB xD/SmartMedia FTL on mtd%d",

diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 161feeb..627d4e2 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c

@@ -16,7 +16,7 @@
  *
  * Test read and write speed of a MTD device.
  *
- * Author: Adrian Hunter <ext-adrian.hunter@nokia.com>
+ * Author: Adrian Hunter <adrian.hunter@nokia.com>
  */
 
 #include <linux/init.h>
@@ -33,6 +33,11 @@
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
 
+static int count;
+module_param(count, int, S_IRUGO);
+MODULE_PARM_DESC(count, "Maximum number of eraseblocks to use "
+			"(0 means use all)");
+
 static struct mtd_info *mtd;
 static unsigned char *iobuf;
 static unsigned char *bbt;
@@ -89,6 +94,33 @@
 	return 0;
 }
 
+static int multiblock_erase(int ebnum, int blocks)
+{
+	int err;
+	struct erase_info ei;
+	loff_t addr = ebnum * mtd->erasesize;
+
+	memset(&ei, 0, sizeof(struct erase_info));
+	ei.mtd  = mtd;
+	ei.addr = addr;
+	ei.len  = mtd->erasesize * blocks;
+
+	err = mtd->erase(mtd, &ei);
+	if (err) {
+		printk(PRINT_PREF "error %d while erasing EB %d, blocks %d\n",
+		       err, ebnum, blocks);
+		return err;
+	}
+
+	if (ei.state == MTD_ERASE_FAILED) {
+		printk(PRINT_PREF "some erase error occurred at EB %d,"
+		       "blocks %d\n", ebnum, blocks);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static int erase_whole_device(void)
 {
 	int err;
@@ -282,13 +314,16 @@
 
 static long calc_speed(void)
 {
-	long ms, k, speed;
+	uint64_t k;
+	long ms;
 
 	ms = (finish.tv_sec - start.tv_sec) * 1000 +
 	     (finish.tv_usec - start.tv_usec) / 1000;
-	k = goodebcnt * mtd->erasesize / 1024;
-	speed = (k * 1000) / ms;
-	return speed;
+	if (ms == 0)
+		return 0;
+	k = goodebcnt * (mtd->erasesize / 1024) * 1000;
+	do_div(k, ms);
+	return k;
 }
 
 static int scan_for_bad_eraseblocks(void)
@@ -320,13 +355,16 @@
 
 static int __init mtd_speedtest_init(void)
 {
-	int err, i;
+	int err, i, blocks, j, k;
 	long speed;
 	uint64_t tmp;
 
 	printk(KERN_INFO "\n");
 	printk(KERN_INFO "=================================================\n");
-	printk(PRINT_PREF "MTD device: %d\n", dev);
+	if (count)
+		printk(PRINT_PREF "MTD device: %d    count: %d\n", dev, count);
+	else
+		printk(PRINT_PREF "MTD device: %d\n", dev);
 
 	mtd = get_mtd_device(NULL, dev);
 	if (IS_ERR(mtd)) {
@@ -353,6 +391,9 @@
 	       (unsigned long long)mtd->size, mtd->erasesize,
 	       pgsize, ebcnt, pgcnt, mtd->oobsize);
 
+	if (count > 0 && count < ebcnt)
+		ebcnt = count;
+
 	err = -ENOMEM;
 	iobuf = kmalloc(mtd->erasesize, GFP_KERNEL);
 	if (!iobuf) {
@@ -484,6 +525,31 @@
 	speed = calc_speed();
 	printk(PRINT_PREF "erase speed is %ld KiB/s\n", speed);
 
+	/* Multi-block erase all eraseblocks */
+	for (k = 1; k < 7; k++) {
+		blocks = 1 << k;
+		printk(PRINT_PREF "Testing %dx multi-block erase speed\n",
+		       blocks);
+		start_timing();
+		for (i = 0; i < ebcnt; ) {
+			for (j = 0; j < blocks && (i + j) < ebcnt; j++)
+				if (bbt[i + j])
+					break;
+			if (j < 1) {
+				i++;
+				continue;
+			}
+			err = multiblock_erase(i, j);
+			if (err)
+				goto out;
+			cond_resched();
+			i += j;
+		}
+		stop_timing();
+		speed = calc_speed();
+		printk(PRINT_PREF "%dx multi-block erase speed is %ld KiB/s\n",
+		       blocks, speed);
+	}
 	printk(PRINT_PREF "finished\n");
 out:
 	kfree(iobuf);

diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index 11204e8..334eae5 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c

@@ -394,6 +394,11 @@
 	}
 
 	subpgsize = mtd->writesize >> mtd->subpage_sft;
+	tmp = mtd->size;
+	do_div(tmp, mtd->erasesize);
+	ebcnt = tmp;
+	pgcnt = mtd->erasesize / mtd->writesize;
+
 	printk(PRINT_PREF "MTD device size %llu, eraseblock size %u, "
 	       "page size %u, subpage size %u, count of eraseblocks %u, "
 	       "pages per eraseblock %u, OOB size %u\n",
@@ -413,11 +418,6 @@
 		goto out;
 	}
 
-	tmp = mtd->size;
-	do_div(tmp, mtd->erasesize);
-	ebcnt = tmp;
-	pgcnt = mtd->erasesize / mtd->writesize;
-
 	err = scan_for_bad_eraseblocks();
 	if (err)
 		goto out;

diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index f142cc2..deaa8bc 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c

@@ -711,14 +711,14 @@
 		return -EBUSY;
 	r2 = request_mem_region(mem_start, A2065_RAM_SIZE, "RAM");
 	if (!r2) {
-		release_resource(r1);
+		release_mem_region(base_addr, sizeof(struct lance_regs));
 		return -EBUSY;
 	}
 
 	dev = alloc_etherdev(sizeof(struct lance_private));
 	if (dev == NULL) {
-		release_resource(r1);
-		release_resource(r2);
+		release_mem_region(base_addr, sizeof(struct lance_regs));
+		release_mem_region(mem_start, A2065_RAM_SIZE);
 		return -ENOMEM;
 	}
 
@@ -764,8 +764,8 @@
 
 	err = register_netdev(dev);
 	if (err) {
-		release_resource(r1);
-		release_resource(r2);
+		release_mem_region(base_addr, sizeof(struct lance_regs));
+		release_mem_region(mem_start, A2065_RAM_SIZE);
 		free_netdev(dev);
 		return err;
 	}

diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 7ca0ede..b7f45cd 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c

@@ -182,14 +182,14 @@
 	return -EBUSY;
     r2 = request_mem_region(mem_start, ARIADNE_RAM_SIZE, "RAM");
     if (!r2) {
-	release_resource(r1);
+	release_mem_region(base_addr, sizeof(struct Am79C960));
 	return -EBUSY;
     }
 
     dev = alloc_etherdev(sizeof(struct ariadne_private));
     if (dev == NULL) {
-	release_resource(r1);
-	release_resource(r2);
+	release_mem_region(base_addr, sizeof(struct Am79C960));
+	release_mem_region(mem_start, ARIADNE_RAM_SIZE);
 	return -ENOMEM;
     }
 
@@ -213,8 +213,8 @@
 
     err = register_netdev(dev);
     if (err) {
-	release_resource(r1);
-	release_resource(r2);
+	release_mem_region(base_addr, sizeof(struct Am79C960));
+	release_mem_region(mem_start, ARIADNE_RAM_SIZE);
 	free_netdev(dev);
 	return err;
     }

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 338bea1..16d6fe9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c

@@ -1482,21 +1482,16 @@
 {
 	struct sk_buff *skb = *pskb;
 	struct slave *slave;
-	struct net_device *bond_dev;
 	struct bonding *bond;
 
-	slave = bond_slave_get_rcu(skb->dev);
-	bond_dev = ACCESS_ONCE(slave->dev->master);
-	if (unlikely(!bond_dev))
-		return RX_HANDLER_PASS;
-
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(!skb))
 		return RX_HANDLER_CONSUMED;
 
 	*pskb = skb;
 
-	bond = netdev_priv(bond_dev);
+	slave = bond_slave_get_rcu(skb->dev);
+	bond = slave->bond;
 
 	if (bond->params.arp_interval)
 		slave->dev->last_rx = jiffies;
@@ -1505,10 +1500,10 @@
 		return RX_HANDLER_EXACT;
 	}
 
-	skb->dev = bond_dev;
+	skb->dev = bond->dev;
 
 	if (bond->params.mode == BOND_MODE_ALB &&
-	    bond_dev->priv_flags & IFF_BRIDGE_PORT &&
+	    bond->dev->priv_flags & IFF_BRIDGE_PORT &&
 	    skb->pkt_type == PACKET_HOST) {
 
 		if (unlikely(skb_cow_head(skb,
@@ -1516,7 +1511,7 @@
 			kfree_skb(skb);
 			return RX_HANDLER_CONSUMED;
 		}
-		memcpy(eth_hdr(skb)->h_dest, bond_dev->dev_addr, ETH_ALEN);
+		memcpy(eth_hdr(skb)->h_dest, bond->dev->dev_addr, ETH_ALEN);
 	}
 
 	return RX_HANDLER_ANOTHER;
@@ -1698,20 +1693,15 @@
 		pr_debug("Error %d calling netdev_set_bond_master\n", res);
 		goto err_restore_mac;
 	}
-	res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
-					 new_slave);
-	if (res) {
-		pr_debug("Error %d calling netdev_rx_handler_register\n", res);
-		goto err_unset_master;
-	}
 
 	/* open the slave since the application closed it */
 	res = dev_open(slave_dev);
 	if (res) {
 		pr_debug("Opening slave %s failed\n", slave_dev->name);
-		goto err_unreg_rxhandler;
+		goto err_unset_master;
 	}
 
+	new_slave->bond = bond;
 	new_slave->dev = slave_dev;
 	slave_dev->priv_flags |= IFF_BONDING;
 
@@ -1907,6 +1897,13 @@
 	if (res)
 		goto err_close;
 
+	res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
+					 new_slave);
+	if (res) {
+		pr_debug("Error %d calling netdev_rx_handler_register\n", res);
+		goto err_dest_symlinks;
+	}
+
 	pr_info("%s: enslaving %s as a%s interface with a%s link.\n",
 		bond_dev->name, slave_dev->name,
 		bond_is_active_slave(new_slave) ? "n active" : " backup",
@@ -1916,13 +1913,12 @@
 	return 0;
 
 /* Undo stages on error */
+err_dest_symlinks:
+	bond_destroy_slave_symlinks(bond_dev, slave_dev);
+
 err_close:
 	dev_close(slave_dev);
 
-err_unreg_rxhandler:
-	netdev_rx_handler_unregister(slave_dev);
-	synchronize_net();
-
 err_unset_master:
 	netdev_set_bond_master(slave_dev, NULL);
 
@@ -1988,6 +1984,14 @@
 		return -EINVAL;
 	}
 
+	/* unregister rx_handler early so bond_handle_frame wouldn't be called
+	 * for this slave anymore.
+	 */
+	netdev_rx_handler_unregister(slave_dev);
+	write_unlock_bh(&bond->lock);
+	synchronize_net();
+	write_lock_bh(&bond->lock);
+
 	if (!bond->params.fail_over_mac) {
 		if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) &&
 		    bond->slave_cnt > 1)
@@ -2104,8 +2108,6 @@
 		netif_addr_unlock_bh(bond_dev);
 	}
 
-	netdev_rx_handler_unregister(slave_dev);
-	synchronize_net();
 	netdev_set_bond_master(slave_dev, NULL);
 
 	slave_disable_netpoll(slave);
@@ -2186,6 +2188,12 @@
 		 */
 		write_unlock_bh(&bond->lock);
 
+		/* unregister rx_handler early so bond_handle_frame wouldn't
+		 * be called for this slave anymore.
+		 */
+		netdev_rx_handler_unregister(slave_dev);
+		synchronize_net();
+
 		if (bond_is_lb(bond)) {
 			/* must be called only after the slave
 			 * has been detached from the list
@@ -2217,8 +2225,6 @@
 			netif_addr_unlock_bh(bond_dev);
 		}
 
-		netdev_rx_handler_unregister(slave_dev);
-		synchronize_net();
 		netdev_set_bond_master(slave_dev, NULL);
 
 		slave_disable_netpoll(slave);

diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 6b26962..90736cb 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h

@@ -187,6 +187,7 @@
 	struct net_device *dev; /* first - useful for panic debug */
 	struct slave *next;
 	struct slave *prev;
+	struct bonding *bond; /* our master */
 	int    delay;
 	unsigned long jiffies;
 	unsigned long last_arp_rx;

diff --git a/drivers/net/davinci_cpdma.c b/drivers/net/davinci_cpdma.c
index e92b2b6..ae47f23 100644
--- a/drivers/net/davinci_cpdma.c
+++ b/drivers/net/davinci_cpdma.c

@@ -76,6 +76,7 @@
 
 struct cpdma_desc_pool {
 	u32			phys;
+	u32			hw_addr;
 	void __iomem		*iomap;		/* ioremap map */
 	void			*cpumap;	/* dma_alloc map */
 	int			desc_size, mem_size;
@@ -137,7 +138,8 @@
  * abstract out these details
  */
 static struct cpdma_desc_pool *
-cpdma_desc_pool_create(struct device *dev, u32 phys, int size, int align)
+cpdma_desc_pool_create(struct device *dev, u32 phys, u32 hw_addr,
+				int size, int align)
 {
 	int bitmap_size;
 	struct cpdma_desc_pool *pool;
@@ -161,10 +163,12 @@
 	if (phys) {
 		pool->phys  = phys;
 		pool->iomap = ioremap(phys, size);
+		pool->hw_addr = hw_addr;
 	} else {
 		pool->cpumap = dma_alloc_coherent(dev, size, &pool->phys,
 						  GFP_KERNEL);
 		pool->iomap = (void __force __iomem *)pool->cpumap;
+		pool->hw_addr = pool->phys;
 	}
 
 	if (pool->iomap)
@@ -201,14 +205,14 @@
 {
 	if (!desc)
 		return 0;
-	return pool->phys + (__force dma_addr_t)desc -
+	return pool->hw_addr + (__force dma_addr_t)desc -
 			    (__force dma_addr_t)pool->iomap;
 }
 
 static inline struct cpdma_desc __iomem *
 desc_from_phys(struct cpdma_desc_pool *pool, dma_addr_t dma)
 {
-	return dma ? pool->iomap + dma - pool->phys : NULL;
+	return dma ? pool->iomap + dma - pool->hw_addr : NULL;
 }
 
 static struct cpdma_desc __iomem *
@@ -260,6 +264,7 @@
 
 	ctlr->pool = cpdma_desc_pool_create(ctlr->dev,
 					    ctlr->params.desc_mem_phys,
+					    ctlr->params.desc_hw_addr,
 					    ctlr->params.desc_mem_size,
 					    ctlr->params.desc_align);
 	if (!ctlr->pool) {

diff --git a/drivers/net/davinci_cpdma.h b/drivers/net/davinci_cpdma.h
index 868e50e..afa19a0 100644
--- a/drivers/net/davinci_cpdma.h
+++ b/drivers/net/davinci_cpdma.h

@@ -33,6 +33,7 @@
 	bool			has_soft_reset;
 	int			min_packet_size;
 	u32			desc_mem_phys;
+	u32			desc_hw_addr;
 	int			desc_mem_size;
 	int			desc_align;
 

diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 082d6ea..baca6bf 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c

@@ -1854,10 +1854,13 @@
 	dma_params.rxcp			= priv->emac_base + 0x660;
 	dma_params.num_chan		= EMAC_MAX_TXRX_CHANNELS;
 	dma_params.min_packet_size	= EMAC_DEF_MIN_ETHPKTSIZE;
-	dma_params.desc_mem_phys	= hw_ram_addr;
+	dma_params.desc_hw_addr		= hw_ram_addr;
 	dma_params.desc_mem_size	= pdata->ctrl_ram_size;
 	dma_params.desc_align		= 16;
 
+	dma_params.desc_mem_phys = pdata->no_bd_ram ? 0 :
+			(u32 __force)res->start + pdata->ctrl_ram_offset;
+
 	priv->dma = cpdma_ctlr_create(&dma_params);
 	if (!priv->dma) {
 		dev_err(emac_dev, "DaVinci EMAC: Error initializing DMA\n");

diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c
index 3a4277f..116cae3 100644
--- a/drivers/net/mlx4/alloc.c
+++ b/drivers/net/mlx4/alloc.c

@@ -62,6 +62,9 @@
 	} else
 		obj = -1;
 
+	if (obj != -1)
+		--bitmap->avail;
+
 	spin_unlock(&bitmap->lock);
 
 	return obj;
@@ -101,11 +104,19 @@
 	} else
 		obj = -1;
 
+	if (obj != -1)
+		bitmap->avail -= cnt;
+
 	spin_unlock(&bitmap->lock);
 
 	return obj;
 }
 
+u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap)
+{
+	return bitmap->avail;
+}
+
 void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt)
 {
 	obj &= bitmap->max + bitmap->reserved_top - 1;
@@ -115,6 +126,7 @@
 	bitmap->last = min(bitmap->last, obj);
 	bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
 			& bitmap->mask;
+	bitmap->avail += cnt;
 	spin_unlock(&bitmap->lock);
 }
 
@@ -130,6 +142,7 @@
 	bitmap->max  = num - reserved_top;
 	bitmap->mask = mask;
 	bitmap->reserved_top = reserved_top;
+	bitmap->avail = num - reserved_top - reserved_bot;
 	spin_lock_init(&bitmap->lock);
 	bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) *
 				sizeof (long), GFP_KERNEL);

diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index 7cd34e9..bd8ef9f 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c

@@ -198,7 +198,7 @@
 	u64 mtt_addr;
 	int err;
 
-	if (vector >= dev->caps.num_comp_vectors)
+	if (vector > dev->caps.num_comp_vectors + dev->caps.comp_pool)
 		return -EINVAL;
 
 	cq->vector = vector;

diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c
index 21786ad..ec4b6d0 100644
--- a/drivers/net/mlx4/en_cq.c
+++ b/drivers/net/mlx4/en_cq.c

@@ -51,13 +51,10 @@
 	int err;
 
 	cq->size = entries;
-	if (mode == RX) {
+	if (mode == RX)
 		cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
-		cq->vector   = ring % mdev->dev->caps.num_comp_vectors;
-	} else {
+	else
 		cq->buf_size = sizeof(struct mlx4_cqe);
-		cq->vector   = 0;
-	}
 
 	cq->ring = ring;
 	cq->is_tx = mode;
@@ -80,7 +77,8 @@
 int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
-	int err;
+	int err = 0;
+	char name[25];
 
 	cq->dev = mdev->pndev[priv->port];
 	cq->mcq.set_ci_db  = cq->wqres.db.db;
@@ -89,6 +87,29 @@
 	*cq->mcq.arm_db    = 0;
 	memset(cq->buf, 0, cq->buf_size);
 
+	if (cq->is_tx == RX) {
+		if (mdev->dev->caps.comp_pool) {
+			if (!cq->vector) {
+				sprintf(name , "%s-rx-%d", priv->dev->name, cq->ring);
+				if (mlx4_assign_eq(mdev->dev, name, &cq->vector)) {
+					cq->vector = (cq->ring + 1 + priv->port) %
+						mdev->dev->caps.num_comp_vectors;
+					mlx4_warn(mdev, "Failed Assigning an EQ to "
+						  "%s_rx-%d ,Falling back to legacy EQ's\n",
+						  priv->dev->name, cq->ring);
+				}
+			}
+		} else {
+			cq->vector = (cq->ring + 1 + priv->port) %
+				mdev->dev->caps.num_comp_vectors;
+		}
+	} else {
+		if (!cq->vector || !mdev->dev->caps.comp_pool) {
+			/*Fallback to legacy pool in case of error*/
+			cq->vector   = 0;
+		}
+	}
+
 	if (!cq->is_tx)
 		cq->size = priv->rx_ring[cq->ring].actual_size;
 
@@ -112,12 +133,15 @@
 	return 0;
 }
 
-void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
+			bool reserve_vectors)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 
 	mlx4_en_unmap_buffer(&cq->wqres.buf);
 	mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
+	if (priv->mdev->dev->caps.comp_pool && cq->vector && !reserve_vectors)
+		mlx4_release_eq(priv->mdev->dev, cq->vector);
 	cq->buf_size = 0;
 	cq->buf = NULL;
 }

diff --git a/drivers/net/mlx4/en_ethtool.c b/drivers/net/mlx4/en_ethtool.c
index 056152b..d54b7ab 100644
--- a/drivers/net/mlx4/en_ethtool.c
+++ b/drivers/net/mlx4/en_ethtool.c

@@ -45,7 +45,7 @@
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
 
-	sprintf(drvinfo->driver, DRV_NAME " (%s)", mdev->dev->board_id);
+	strncpy(drvinfo->driver, DRV_NAME, 32);
 	strncpy(drvinfo->version, DRV_VERSION " (" DRV_RELDATE ")", 32);
 	sprintf(drvinfo->fw_version, "%d.%d.%d",
 		(u16) (mdev->dev->caps.fw_ver >> 32),
@@ -131,8 +131,65 @@
 static void mlx4_en_get_wol(struct net_device *netdev,
 			    struct ethtool_wolinfo *wol)
 {
-	wol->supported = 0;
-	wol->wolopts = 0;
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+	int err = 0;
+	u64 config = 0;
+
+	if (!priv->mdev->dev->caps.wol) {
+		wol->supported = 0;
+		wol->wolopts = 0;
+		return;
+	}
+
+	err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
+	if (err) {
+		en_err(priv, "Failed to get WoL information\n");
+		return;
+	}
+
+	if (config & MLX4_EN_WOL_MAGIC)
+		wol->supported = WAKE_MAGIC;
+	else
+		wol->supported = 0;
+
+	if (config & MLX4_EN_WOL_ENABLED)
+		wol->wolopts = WAKE_MAGIC;
+	else
+		wol->wolopts = 0;
+}
+
+static int mlx4_en_set_wol(struct net_device *netdev,
+			    struct ethtool_wolinfo *wol)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+	u64 config = 0;
+	int err = 0;
+
+	if (!priv->mdev->dev->caps.wol)
+		return -EOPNOTSUPP;
+
+	if (wol->supported & ~WAKE_MAGIC)
+		return -EINVAL;
+
+	err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
+	if (err) {
+		en_err(priv, "Failed to get WoL info, unable to modify\n");
+		return err;
+	}
+
+	if (wol->wolopts & WAKE_MAGIC) {
+		config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED |
+				MLX4_EN_WOL_MAGIC;
+	} else {
+		config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC);
+		config |= MLX4_EN_WOL_DO_MODIFY;
+	}
+
+	err = mlx4_wol_write(priv->mdev->dev, config, priv->port);
+	if (err)
+		en_err(priv, "Failed to set WoL information\n");
+
+	return err;
 }
 
 static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
@@ -388,7 +445,7 @@
 		mlx4_en_stop_port(dev);
 	}
 
-	mlx4_en_free_resources(priv);
+	mlx4_en_free_resources(priv, true);
 
 	priv->prof->tx_ring_size = tx_size;
 	priv->prof->rx_ring_size = rx_size;
@@ -442,6 +499,7 @@
 	.get_ethtool_stats = mlx4_en_get_ethtool_stats,
 	.self_test = mlx4_en_self_test,
 	.get_wol = mlx4_en_get_wol,
+	.set_wol = mlx4_en_set_wol,
 	.get_msglevel = mlx4_en_get_msglevel,
 	.set_msglevel = mlx4_en_set_msglevel,
 	.get_coalesce = mlx4_en_get_coalesce,

diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c
index 1ff6ca6..9317b61 100644
--- a/drivers/net/mlx4/en_main.c
+++ b/drivers/net/mlx4/en_main.c

@@ -241,16 +241,18 @@
 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
 		mdev->port_cnt++;
 
-	/* If we did not receive an explicit number of Rx rings, default to
-	 * the number of completion vectors populated by the mlx4_core */
 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
-		mlx4_info(mdev, "Using %d tx rings for port:%d\n",
-			  mdev->profile.prof[i].tx_ring_num, i);
-		mdev->profile.prof[i].rx_ring_num = min_t(int,
-			roundup_pow_of_two(dev->caps.num_comp_vectors),
-			MAX_RX_RINGS);
-		mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n",
-			  mdev->profile.prof[i].rx_ring_num, i);
+		if (!dev->caps.comp_pool) {
+			mdev->profile.prof[i].rx_ring_num =
+				rounddown_pow_of_two(max_t(int, MIN_RX_RINGS,
+							   min_t(int,
+								 dev->caps.num_comp_vectors,
+								 MAX_RX_RINGS)));
+		} else {
+			mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two(
+				min_t(int, dev->caps.comp_pool/
+				      dev->caps.num_ports - 1 , MAX_MSIX_P_PORT - 1));
+		}
 	}
 
 	/* Create our own workqueue for reset/multicast tasks
@@ -294,7 +296,7 @@
 	.remove		= mlx4_en_remove,
 	.event		= mlx4_en_event,
 	.get_dev	= mlx4_en_get_netdev,
-	.protocol	= MLX4_PROTOCOL_EN,
+	.protocol	= MLX4_PROT_ETH,
 };
 
 static int __init mlx4_en_init(void)

diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 897f576..5762ebd 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c

@@ -156,9 +156,8 @@
 	mutex_lock(&mdev->state_lock);
 	if (priv->port_up) {
 		/* Remove old MAC and insert the new one */
-		mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
-		err = mlx4_register_mac(mdev->dev, priv->port,
-					priv->mac, &priv->mac_index);
+		err = mlx4_replace_mac(mdev->dev, priv->port,
+				       priv->base_qpn, priv->mac, 0);
 		if (err)
 			en_err(priv, "Failed changing HW MAC address\n");
 	} else
@@ -214,6 +213,7 @@
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct net_device *dev = priv->dev;
 	u64 mcast_addr = 0;
+	u8 mc_list[16] = {0};
 	int err;
 
 	mutex_lock(&mdev->state_lock);
@@ -239,8 +239,12 @@
 			priv->flags |= MLX4_EN_FLAG_PROMISC;
 
 			/* Enable promiscouos mode */
-			err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
-						     priv->base_qpn, 1);
+			if (!mdev->dev->caps.vep_uc_steering)
+				err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
+							     priv->base_qpn, 1);
+			else
+				err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn,
+							       priv->port);
 			if (err)
 				en_err(priv, "Failed enabling "
 					     "promiscous mode\n");
@@ -252,10 +256,21 @@
 				en_err(priv, "Failed disabling "
 					     "multicast filter\n");
 
-			/* Disable port VLAN filter */
-			err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, NULL);
-			if (err)
-				en_err(priv, "Failed disabling VLAN filter\n");
+			/* Add the default qp number as multicast promisc */
+			if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) {
+				err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn,
+								 priv->port);
+				if (err)
+					en_err(priv, "Failed entering multicast promisc mode\n");
+				priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+			}
+
+			if (priv->vlgrp) {
+				/* Disable port VLAN filter */
+				err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, NULL);
+				if (err)
+					en_err(priv, "Failed disabling VLAN filter\n");
+			}
 		}
 		goto out;
 	}
@@ -270,11 +285,24 @@
 		priv->flags &= ~MLX4_EN_FLAG_PROMISC;
 
 		/* Disable promiscouos mode */
-		err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
-					     priv->base_qpn, 0);
+		if (!mdev->dev->caps.vep_uc_steering)
+			err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
+						     priv->base_qpn, 0);
+		else
+			err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn,
+							  priv->port);
 		if (err)
 			en_err(priv, "Failed disabling promiscous mode\n");
 
+		/* Disable Multicast promisc */
+		if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
+			err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn,
+							    priv->port);
+			if (err)
+				en_err(priv, "Failed disabling multicast promiscous mode\n");
+			priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+		}
+
 		/* Enable port VLAN filter */
 		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlgrp);
 		if (err)
@@ -287,14 +315,38 @@
 					  0, MLX4_MCAST_DISABLE);
 		if (err)
 			en_err(priv, "Failed disabling multicast filter\n");
+
+		/* Add the default qp number as multicast promisc */
+		if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) {
+			err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn,
+							 priv->port);
+			if (err)
+				en_err(priv, "Failed entering multicast promisc mode\n");
+			priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+		}
 	} else {
 		int i;
+		/* Disable Multicast promisc */
+		if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
+			err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn,
+							    priv->port);
+			if (err)
+				en_err(priv, "Failed disabling multicast promiscous mode\n");
+			priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+		}
 
 		err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
 					  0, MLX4_MCAST_DISABLE);
 		if (err)
 			en_err(priv, "Failed disabling multicast filter\n");
 
+		/* Detach our qp from all the multicast addresses */
+		for (i = 0; i < priv->mc_addrs_cnt; i++) {
+			memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN);
+			mc_list[5] = priv->port;
+			mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp,
+					      mc_list, MLX4_PROT_ETH);
+		}
 		/* Flush mcast filter and init it with broadcast address */
 		mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST,
 				    1, MLX4_MCAST_CONFIG);
@@ -307,6 +359,10 @@
 		for (i = 0; i < priv->mc_addrs_cnt; i++) {
 			mcast_addr =
 			      mlx4_en_mac_to_u64(priv->mc_addrs + i * ETH_ALEN);
+			memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN);
+			mc_list[5] = priv->port;
+			mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp,
+					      mc_list, 0, MLX4_PROT_ETH);
 			mlx4_SET_MCAST_FLTR(mdev->dev, priv->port,
 					    mcast_addr, 0, MLX4_MCAST_CONFIG);
 		}
@@ -314,8 +370,6 @@
 					  0, MLX4_MCAST_ENABLE);
 		if (err)
 			en_err(priv, "Failed enabling multicast filter\n");
-
-		mlx4_en_clear_list(dev);
 	}
 out:
 	mutex_unlock(&mdev->state_lock);
@@ -417,7 +471,6 @@
 	unsigned long avg_pkt_size;
 	unsigned long rx_packets;
 	unsigned long rx_bytes;
-	unsigned long rx_byte_diff;
 	unsigned long tx_packets;
 	unsigned long tx_pkt_diff;
 	unsigned long rx_pkt_diff;
@@ -441,25 +494,20 @@
 	rx_pkt_diff = ((unsigned long) (rx_packets -
 					priv->last_moder_packets));
 	packets = max(tx_pkt_diff, rx_pkt_diff);
-	rx_byte_diff = rx_bytes - priv->last_moder_bytes;
-	rx_byte_diff = rx_byte_diff ? rx_byte_diff : 1;
 	rate = packets * HZ / period;
 	avg_pkt_size = packets ? ((unsigned long) (rx_bytes -
 				 priv->last_moder_bytes)) / packets : 0;
 
 	/* Apply auto-moderation only when packet rate exceeds a rate that
 	 * it matters */
-	if (rate > MLX4_EN_RX_RATE_THRESH) {
+	if (rate > MLX4_EN_RX_RATE_THRESH && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) {
 		/* If tx and rx packet rates are not balanced, assume that
 		 * traffic is mainly BW bound and apply maximum moderation.
 		 * Otherwise, moderate according to packet rate */
-		if (2 * tx_pkt_diff > 3 * rx_pkt_diff &&
-		    rx_pkt_diff / rx_byte_diff <
-		    MLX4_EN_SMALL_PKT_SIZE)
-			moder_time = priv->rx_usecs_low;
-		else if (2 * rx_pkt_diff > 3 * tx_pkt_diff)
+		if (2 * tx_pkt_diff > 3 * rx_pkt_diff ||
+		    2 * rx_pkt_diff > 3 * tx_pkt_diff) {
 			moder_time = priv->rx_usecs_high;
-		else {
+		} else {
 			if (rate < priv->pkt_rate_low)
 				moder_time = priv->rx_usecs_low;
 			else if (rate > priv->pkt_rate_high)
@@ -471,9 +519,7 @@
 					priv->rx_usecs_low;
 		}
 	} else {
-		/* When packet rate is low, use default moderation rather than
-		 * 0 to prevent interrupt storms if traffic suddenly increases */
-		moder_time = priv->rx_usecs;
+		moder_time = priv->rx_usecs_low;
 	}
 
 	en_dbg(INTR, priv, "tx rate:%lu rx_rate:%lu\n",
@@ -565,6 +611,8 @@
 	int err = 0;
 	int i;
 	int j;
+	u8 mc_list[16] = {0};
+	char name[32];
 
 	if (priv->port_up) {
 		en_dbg(DRV, priv, "start port called while port already up\n");
@@ -603,16 +651,35 @@
 		++rx_index;
 	}
 
+	/* Set port mac number */
+	en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port);
+	err = mlx4_register_mac(mdev->dev, priv->port,
+				priv->mac, &priv->base_qpn, 0);
+	if (err) {
+		en_err(priv, "Failed setting port mac\n");
+		goto cq_err;
+	}
+	mdev->mac_removed[priv->port] = 0;
+
 	err = mlx4_en_config_rss_steer(priv);
 	if (err) {
 		en_err(priv, "Failed configuring rss steering\n");
-		goto cq_err;
+		goto mac_err;
 	}
 
+	if (mdev->dev->caps.comp_pool && !priv->tx_vector) {
+		sprintf(name , "%s-tx", priv->dev->name);
+		if (mlx4_assign_eq(mdev->dev , name, &priv->tx_vector)) {
+			mlx4_warn(mdev, "Failed Assigning an EQ to "
+					"%s_tx ,Falling back to legacy "
+					"EQ's\n", priv->dev->name);
+		}
+	}
 	/* Configure tx cq's and rings */
 	for (i = 0; i < priv->tx_ring_num; i++) {
 		/* Configure cq */
 		cq = &priv->tx_cq[i];
+		cq->vector = priv->tx_vector;
 		err = mlx4_en_activate_cq(priv, cq);
 		if (err) {
 			en_err(priv, "Failed allocating Tx CQ\n");
@@ -659,24 +726,22 @@
 		en_err(priv, "Failed setting default qp numbers\n");
 		goto tx_err;
 	}
-	/* Set port mac number */
-	en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port);
-	err = mlx4_register_mac(mdev->dev, priv->port,
-				priv->mac, &priv->mac_index);
-	if (err) {
-		en_err(priv, "Failed setting port mac\n");
-		goto tx_err;
-	}
-	mdev->mac_removed[priv->port] = 0;
 
 	/* Init port */
 	en_dbg(HW, priv, "Initializing port\n");
 	err = mlx4_INIT_PORT(mdev->dev, priv->port);
 	if (err) {
 		en_err(priv, "Failed Initializing port\n");
-		goto mac_err;
+		goto tx_err;
 	}
 
+	/* Attach rx QP to bradcast address */
+	memset(&mc_list[10], 0xff, ETH_ALEN);
+	mc_list[5] = priv->port;
+	if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list,
+				  0, MLX4_PROT_ETH))
+		mlx4_warn(mdev, "Failed Attaching Broadcast\n");
+
 	/* Schedule multicast task to populate multicast list */
 	queue_work(mdev->workqueue, &priv->mcast_task);
 
@@ -684,8 +749,6 @@
 	netif_tx_start_all_queues(dev);
 	return 0;
 
-mac_err:
-	mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
 tx_err:
 	while (tx_index--) {
 		mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]);
@@ -693,6 +756,8 @@
 	}
 
 	mlx4_en_release_rss_steer(priv);
+mac_err:
+	mlx4_unregister_mac(mdev->dev, priv->port, priv->base_qpn);
 cq_err:
 	while (rx_index--)
 		mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]);
@@ -708,6 +773,7 @@
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
 	int i;
+	u8 mc_list[16] = {0};
 
 	if (!priv->port_up) {
 		en_dbg(DRV, priv, "stop port called while port already down\n");
@@ -722,8 +788,23 @@
 	/* Set port as not active */
 	priv->port_up = false;
 
+	/* Detach All multicasts */
+	memset(&mc_list[10], 0xff, ETH_ALEN);
+	mc_list[5] = priv->port;
+	mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list,
+			      MLX4_PROT_ETH);
+	for (i = 0; i < priv->mc_addrs_cnt; i++) {
+		memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN);
+		mc_list[5] = priv->port;
+		mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp,
+				      mc_list, MLX4_PROT_ETH);
+	}
+	mlx4_en_clear_list(dev);
+	/* Flush multicast filter */
+	mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG);
+
 	/* Unregister Mac address for the port */
-	mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
+	mlx4_unregister_mac(mdev->dev, priv->port, priv->base_qpn);
 	mdev->mac_removed[priv->port] = 1;
 
 	/* Free TX Rings */
@@ -801,7 +882,6 @@
 		priv->rx_ring[i].packets = 0;
 	}
 
-	mlx4_en_set_default_moderation(priv);
 	err = mlx4_en_start_port(dev);
 	if (err)
 		en_err(priv, "Failed starting port:%d\n", priv->port);
@@ -828,7 +908,7 @@
 	return 0;
 }
 
-void mlx4_en_free_resources(struct mlx4_en_priv *priv)
+void mlx4_en_free_resources(struct mlx4_en_priv *priv, bool reserve_vectors)
 {
 	int i;
 
@@ -836,14 +916,14 @@
 		if (priv->tx_ring[i].tx_info)
 			mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]);
 		if (priv->tx_cq[i].buf)
-			mlx4_en_destroy_cq(priv, &priv->tx_cq[i]);
+			mlx4_en_destroy_cq(priv, &priv->tx_cq[i], reserve_vectors);
 	}
 
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		if (priv->rx_ring[i].rx_info)
 			mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i]);
 		if (priv->rx_cq[i].buf)
-			mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
+			mlx4_en_destroy_cq(priv, &priv->rx_cq[i], reserve_vectors);
 	}
 }
 
@@ -851,6 +931,13 @@
 {
 	struct mlx4_en_port_profile *prof = priv->prof;
 	int i;
+	int base_tx_qpn, err;
+
+	err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &base_tx_qpn);
+	if (err) {
+		en_err(priv, "failed reserving range for TX rings\n");
+		return err;
+	}
 
 	/* Create tx Rings */
 	for (i = 0; i < priv->tx_ring_num; i++) {
@@ -858,7 +945,7 @@
 				      prof->tx_ring_size, i, TX))
 			goto err;
 
-		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i],
+		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], base_tx_qpn + i,
 					   prof->tx_ring_size, TXBB_SIZE))
 			goto err;
 	}
@@ -878,6 +965,7 @@
 
 err:
 	en_err(priv, "Failed to allocate NIC resources\n");
+	mlx4_qp_release_range(priv->mdev->dev, base_tx_qpn, priv->tx_ring_num);
 	return -ENOMEM;
 }
 
@@ -905,7 +993,7 @@
 	mdev->pndev[priv->port] = NULL;
 	mutex_unlock(&mdev->state_lock);
 
-	mlx4_en_free_resources(priv);
+	mlx4_en_free_resources(priv, false);
 	free_netdev(dev);
 }
 
@@ -932,7 +1020,6 @@
 			en_dbg(DRV, priv, "Change MTU called with card down!?\n");
 		} else {
 			mlx4_en_stop_port(dev);
-			mlx4_en_set_default_moderation(priv);
 			err = mlx4_en_start_port(dev);
 			if (err) {
 				en_err(priv, "Failed restarting port:%d\n",
@@ -1079,7 +1166,25 @@
 	en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num);
 	en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
 
+	/* Configure port */
+	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+				    MLX4_EN_MIN_MTU,
+				    0, 0, 0, 0);
+	if (err) {
+		en_err(priv, "Failed setting port general configurations "
+		       "for port %d, with error %d\n", priv->port, err);
+		goto out;
+	}
+
+	/* Init port */
+	en_warn(priv, "Initializing port\n");
+	err = mlx4_INIT_PORT(mdev->dev, priv->port);
+	if (err) {
+		en_err(priv, "Failed Initializing port\n");
+		goto out;
+	}
 	priv->registered = 1;
+	mlx4_en_set_default_moderation(priv);
 	queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
 	return 0;
 

diff --git a/drivers/net/mlx4/en_port.c b/drivers/net/mlx4/en_port.c
index 7f5a322..f2a4f5d 100644
--- a/drivers/net/mlx4/en_port.c
+++ b/drivers/net/mlx4/en_port.c

@@ -119,6 +119,10 @@
 	struct mlx4_set_port_rqp_calc_context *context;
 	int err;
 	u32 in_mod;
+	u32 m_promisc = (dev->caps.vep_mc_steering) ? MCAST_DIRECT : MCAST_DEFAULT;
+
+	if (dev->caps.vep_mc_steering && dev->caps.vep_uc_steering)
+		return 0;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
@@ -127,8 +131,11 @@
 	memset(context, 0, sizeof *context);
 
 	context->base_qpn = cpu_to_be32(base_qpn);
-	context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_EN_SHIFT | base_qpn);
-	context->mcast = cpu_to_be32(1 << SET_PORT_PROMISC_MODE_SHIFT | base_qpn);
+	context->n_mac = 0x7;
+	context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
+				       base_qpn);
+	context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
+				     base_qpn);
 	context->intra_no_vlan = 0;
 	context->no_vlan = MLX4_NO_VLAN_IDX;
 	context->intra_vlan_miss = 0;
@@ -206,7 +213,7 @@
 	}
 	stats->tx_packets = 0;
 	stats->tx_bytes = 0;
-	for (i = 0; i <= priv->tx_ring_num; i++) {
+	for (i = 0; i < priv->tx_ring_num; i++) {
 		stats->tx_packets += priv->tx_ring[i].packets;
 		stats->tx_bytes += priv->tx_ring[i].bytes;
 	}

diff --git a/drivers/net/mlx4/en_port.h b/drivers/net/mlx4/en_port.h
index 092e814..e3d73e4 100644
--- a/drivers/net/mlx4/en_port.h
+++ b/drivers/net/mlx4/en_port.h

@@ -36,8 +36,8 @@
 
 
 #define SET_PORT_GEN_ALL_VALID	0x7
-#define SET_PORT_PROMISC_EN_SHIFT	31
-#define SET_PORT_PROMISC_MODE_SHIFT	30
+#define SET_PORT_PROMISC_SHIFT	31
+#define SET_PORT_MC_PROMISC_SHIFT	30
 
 enum {
 	MLX4_CMD_SET_VLAN_FLTR  = 0x47,
@@ -45,6 +45,12 @@
 	MLX4_CMD_DUMP_ETH_STATS = 0x49,
 };
 
+enum {
+	MCAST_DIRECT_ONLY       = 0,
+	MCAST_DIRECT            = 1,
+	MCAST_DEFAULT           = 2
+};
+
 struct mlx4_set_port_general_context {
 	u8 reserved[3];
 	u8 flags;
@@ -60,14 +66,17 @@
 
 struct mlx4_set_port_rqp_calc_context {
 	__be32 base_qpn;
-	__be32 flags;
-	u8 reserved[3];
+	u8 rererved;
+	u8 n_mac;
+	u8 n_vlan;
+	u8 n_prio;
+	u8 reserved2[3];
 	u8 mac_miss;
 	u8 intra_no_vlan;
 	u8 no_vlan;
 	u8 intra_vlan_miss;
 	u8 vlan_miss;
-	u8 reserved2[3];
+	u8 reserved3[3];
 	u8 no_vlan_prio;
 	__be32 promisc;
 	__be32 mcast;

diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index 570f250..05998ee 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c

@@ -845,16 +845,10 @@
 	}
 
 	/* Configure RSS indirection qp */
-	err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn);
-	if (err) {
-		en_err(priv, "Failed to reserve range for RSS "
-			     "indirection qp\n");
-		goto rss_err;
-	}
 	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
 	if (err) {
 		en_err(priv, "Failed to allocate RSS indirection QP\n");
-		goto reserve_err;
+		goto rss_err;
 	}
 	rss_map->indir_qp.event = mlx4_en_sqp_event;
 	mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
@@ -881,8 +875,6 @@
 		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
 	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
 	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
-reserve_err:
-	mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
 rss_err:
 	for (i = 0; i < good_qps; i++) {
 		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
@@ -904,7 +896,6 @@
 		       MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
 	mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
 	mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
-	mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
 
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],

diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
index a680cd4..01feb8f 100644
--- a/drivers/net/mlx4/en_tx.c
+++ b/drivers/net/mlx4/en_tx.c

@@ -44,6 +44,7 @@
 
 enum {
 	MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */
+	MAX_BF = 256,
 };
 
 static int inline_thold __read_mostly = MAX_INLINE;
@@ -52,7 +53,7 @@
 MODULE_PARM_DESC(inline_thold, "threshold for using inline data");
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
-			   struct mlx4_en_tx_ring *ring, u32 size,
+			   struct mlx4_en_tx_ring *ring, int qpn, u32 size,
 			   u16 stride)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -103,23 +104,25 @@
 	       "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size,
 	       ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
 
-	err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn);
-	if (err) {
-		en_err(priv, "Failed reserving qp for tx ring.\n");
-		goto err_map;
-	}
-
+	ring->qpn = qpn;
 	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp);
 	if (err) {
 		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
-		goto err_reserve;
+		goto err_map;
 	}
 	ring->qp.event = mlx4_en_sqp_event;
 
+	err = mlx4_bf_alloc(mdev->dev, &ring->bf);
+	if (err) {
+		en_dbg(DRV, priv, "working without blueflame (%d)", err);
+		ring->bf.uar = &mdev->priv_uar;
+		ring->bf.uar->map = mdev->uar_map;
+		ring->bf_enabled = false;
+	} else
+		ring->bf_enabled = true;
+
 	return 0;
 
-err_reserve:
-	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
 err_map:
 	mlx4_en_unmap_buffer(&ring->wqres.buf);
 err_hwq_res:
@@ -139,6 +142,8 @@
 	struct mlx4_en_dev *mdev = priv->mdev;
 	en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
 
+	if (ring->bf_enabled)
+		mlx4_bf_free(mdev->dev, &ring->bf);
 	mlx4_qp_remove(mdev->dev, &ring->qp);
 	mlx4_qp_free(mdev->dev, &ring->qp);
 	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
@@ -171,6 +176,8 @@
 
 	mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
 				ring->cqn, &ring->context);
+	if (ring->bf_enabled)
+		ring->context.usr_page = cpu_to_be32(ring->bf.uar->index);
 
 	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
 			       &ring->qp, &ring->qp_state);
@@ -591,6 +598,11 @@
 	return skb_tx_hash(dev, skb);
 }
 
+static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt)
+{
+	__iowrite64_copy(dst, src, bytecnt / 8);
+}
+
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -609,12 +621,13 @@
 	int desc_size;
 	int real_size;
 	dma_addr_t dma;
-	u32 index;
+	u32 index, bf_index;
 	__be32 op_own;
 	u16 vlan_tag = 0;
 	int i;
 	int lso_header_size;
 	void *fragptr;
+	bool bounce = false;
 
 	if (!priv->port_up)
 		goto tx_drop;
@@ -657,13 +670,16 @@
 
 	/* Packet is good - grab an index and transmit it */
 	index = ring->prod & ring->size_mask;
+	bf_index = ring->prod;
 
 	/* See if we have enough space for whole descriptor TXBB for setting
 	 * SW ownership on next descriptor; if not, use a bounce buffer. */
 	if (likely(index + nr_txbb <= ring->size))
 		tx_desc = ring->buf + index * TXBB_SIZE;
-	else
+	else {
 		tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
+		bounce = true;
+	}
 
 	/* Save skb in tx_info ring */
 	tx_info = &ring->tx_info[index];
@@ -768,21 +784,37 @@
 	ring->prod += nr_txbb;
 
 	/* If we used a bounce buffer then copy descriptor back into place */
-	if (tx_desc == (struct mlx4_en_tx_desc *) ring->bounce_buf)
+	if (bounce)
 		tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
 
 	/* Run destructor before passing skb to HW */
 	if (likely(!skb_shared(skb)))
 		skb_orphan(skb);
 
-	/* Ensure new descirptor hits memory
-	 * before setting ownership of this descriptor to HW */
-	wmb();
-	tx_desc->ctrl.owner_opcode = op_own;
+	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) {
+		*(u32 *) (&tx_desc->ctrl.vlan_tag) |= ring->doorbell_qpn;
+		op_own |= htonl((bf_index & 0xffff) << 8);
+		/* Ensure new descirptor hits memory
+		* before setting ownership of this descriptor to HW */
+		wmb();
+		tx_desc->ctrl.owner_opcode = op_own;
 
-	/* Ring doorbell! */
-	wmb();
-	writel(ring->doorbell_qpn, mdev->uar_map + MLX4_SEND_DOORBELL);
+		wmb();
+
+		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, (unsigned long *) &tx_desc->ctrl,
+		     desc_size);
+
+		wmb();
+
+		ring->bf.offset ^= ring->bf.buf_size;
+	} else {
+		/* Ensure new descirptor hits memory
+		* before setting ownership of this descriptor to HW */
+		wmb();
+		tx_desc->ctrl.owner_opcode = op_own;
+		wmb();
+		writel(ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL);
+	}
 
 	/* Poll CQ here */
 	mlx4_en_xmit_poll(priv, tx_ind);

diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 552d0fc..506cfd0 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c

@@ -42,7 +42,7 @@
 #include "fw.h"
 
 enum {
-	MLX4_IRQNAME_SIZE	= 64
+	MLX4_IRQNAME_SIZE	= 32
 };
 
 enum {
@@ -317,8 +317,8 @@
 	 * we need to map, take the difference of highest index and
 	 * the lowest index we'll use and add 1.
 	 */
-	return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 -
-		dev->caps.reserved_eqs / 4 + 1;
+	return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs +
+		 dev->caps.comp_pool)/4 - dev->caps.reserved_eqs/4 + 1;
 }
 
 static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
@@ -496,16 +496,32 @@
 static void mlx4_free_irqs(struct mlx4_dev *dev)
 {
 	struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table;
-	int i;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int	i, vec;
 
 	if (eq_table->have_irq)
 		free_irq(dev->pdev->irq, dev);
+
 	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		if (eq_table->eq[i].have_irq) {
 			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
 			eq_table->eq[i].have_irq = 0;
 		}
 
+	for (i = 0; i < dev->caps.comp_pool; i++) {
+		/*
+		 * Freeing the assigned irq's
+		 * all bits should be 0, but we need to validate
+		 */
+		if (priv->msix_ctl.pool_bm & 1ULL << i) {
+			/* NO need protecting*/
+			vec = dev->caps.num_comp_vectors + 1 + i;
+			free_irq(priv->eq_table.eq[vec].irq,
+				 &priv->eq_table.eq[vec]);
+		}
+	}
+
+
 	kfree(eq_table->irq_names);
 }
 
@@ -578,7 +594,8 @@
 		(priv->eq_table.inta_pin < 32 ? 4 : 0);
 
 	priv->eq_table.irq_names =
-		kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1),
+		kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1 +
+					     dev->caps.comp_pool),
 			GFP_KERNEL);
 	if (!priv->eq_table.irq_names) {
 		err = -ENOMEM;
@@ -601,6 +618,22 @@
 	if (err)
 		goto err_out_comp;
 
+	/*if additional completion vectors poolsize is 0 this loop will not run*/
+	for (i = dev->caps.num_comp_vectors + 1;
+	      i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) {
+
+		err = mlx4_create_eq(dev, dev->caps.num_cqs -
+					  dev->caps.reserved_cqs +
+					  MLX4_NUM_SPARE_EQE,
+				     (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
+				     &priv->eq_table.eq[i]);
+		if (err) {
+			--i;
+			goto err_out_unmap;
+		}
+	}
+
+
 	if (dev->flags & MLX4_FLAG_MSI_X) {
 		const char *eq_name;
 
@@ -686,7 +719,7 @@
 
 	mlx4_free_irqs(dev);
 
-	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
+	for (i = 0; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i)
 		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
 
 	mlx4_unmap_clr_int(dev);
@@ -743,3 +776,65 @@
 	return err;
 }
 EXPORT_SYMBOL(mlx4_test_interrupts);
+
+int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector)
+{
+
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int vec = 0, err = 0, i;
+
+	spin_lock(&priv->msix_ctl.pool_lock);
+	for (i = 0; !vec && i < dev->caps.comp_pool; i++) {
+		if (~priv->msix_ctl.pool_bm & 1ULL << i) {
+			priv->msix_ctl.pool_bm |= 1ULL << i;
+			vec = dev->caps.num_comp_vectors + 1 + i;
+			snprintf(priv->eq_table.irq_names +
+					vec * MLX4_IRQNAME_SIZE,
+					MLX4_IRQNAME_SIZE, "%s", name);
+			err = request_irq(priv->eq_table.eq[vec].irq,
+					  mlx4_msi_x_interrupt, 0,
+					  &priv->eq_table.irq_names[vec<<5],
+					  priv->eq_table.eq + vec);
+			if (err) {
+				/*zero out bit by fliping it*/
+				priv->msix_ctl.pool_bm ^= 1 << i;
+				vec = 0;
+				continue;
+				/*we dont want to break here*/
+			}
+			eq_set_ci(&priv->eq_table.eq[vec], 1);
+		}
+	}
+	spin_unlock(&priv->msix_ctl.pool_lock);
+
+	if (vec) {
+		*vector = vec;
+	} else {
+		*vector = 0;
+		err = (i == dev->caps.comp_pool) ? -ENOSPC : err;
+	}
+	return err;
+}
+EXPORT_SYMBOL(mlx4_assign_eq);
+
+void mlx4_release_eq(struct mlx4_dev *dev, int vec)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	/*bm index*/
+	int i = vec - dev->caps.num_comp_vectors - 1;
+
+	if (likely(i >= 0)) {
+		/*sanity check , making sure were not trying to free irq's
+		  Belonging to a legacy EQ*/
+		spin_lock(&priv->msix_ctl.pool_lock);
+		if (priv->msix_ctl.pool_bm & 1ULL << i) {
+			free_irq(priv->eq_table.eq[vec].irq,
+				 &priv->eq_table.eq[vec]);
+			priv->msix_ctl.pool_bm &= ~(1ULL << i);
+		}
+		spin_unlock(&priv->msix_ctl.pool_lock);
+	}
+
+}
+EXPORT_SYMBOL(mlx4_release_eq);
+

diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 5de1db8..67a209b 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c

@@ -274,8 +274,11 @@
 	dev_cap->stat_rate_support = stat_rate;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_UDP_RSS_OFFSET);
 	dev_cap->udp_rss = field & 0x1;
+	dev_cap->vep_uc_steering = field & 0x2;
+	dev_cap->vep_mc_steering = field & 0x4;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_LOOPBACK_OFFSET);
 	dev_cap->loopback_support = field & 0x1;
+	dev_cap->wol = field & 0x40;
 	MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
 	dev_cap->reserved_uars = field >> 4;
@@ -737,6 +740,7 @@
 #define	 INIT_HCA_MC_BASE_OFFSET	 (INIT_HCA_MCAST_OFFSET + 0x00)
 #define	 INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
 #define	 INIT_HCA_LOG_MC_HASH_SZ_OFFSET	 (INIT_HCA_MCAST_OFFSET + 0x16)
+#define  INIT_HCA_UC_STEERING_OFFSET	 (INIT_HCA_MCAST_OFFSET + 0x18)
 #define	 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
 #define INIT_HCA_TPT_OFFSET		 0x0f0
 #define	 INIT_HCA_DMPT_BASE_OFFSET	 (INIT_HCA_TPT_OFFSET + 0x00)
@@ -797,6 +801,8 @@
 	MLX4_PUT(inbox, param->mc_base,		INIT_HCA_MC_BASE_OFFSET);
 	MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
 	MLX4_PUT(inbox, param->log_mc_hash_sz,  INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+	if (dev->caps.vep_mc_steering)
+		MLX4_PUT(inbox, (u8) (1 << 3),	INIT_HCA_UC_STEERING_OFFSET);
 	MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
 
 	/* TPT attributes */
@@ -908,3 +914,22 @@
 	/* Input modifier of 0x1f means "finish as soon as possible." */
 	return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100);
 }
+
+#define MLX4_WOL_SETUP_MODE (5 << 28)
+int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port)
+{
+	u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
+
+	return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3,
+			    MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A);
+}
+EXPORT_SYMBOL_GPL(mlx4_wol_read);
+
+int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port)
+{
+	u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
+
+	return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG,
+					MLX4_CMD_TIME_CLASS_A);
+}
+EXPORT_SYMBOL_GPL(mlx4_wol_write);

diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index 65cc72e..88003eb 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h

@@ -80,6 +80,9 @@
 	u16 stat_rate_support;
 	int udp_rss;
 	int loopback_support;
+	int vep_uc_steering;
+	int vep_mc_steering;
+	int wol;
 	u32 flags;
 	int reserved_uars;
 	int uar_size;

diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index c835011..62fa7ee 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c

@@ -39,6 +39,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
+#include <linux/io-mapping.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
@@ -227,6 +228,9 @@
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.udp_rss	     = dev_cap->udp_rss;
 	dev->caps.loopback_support   = dev_cap->loopback_support;
+	dev->caps.vep_uc_steering    = dev_cap->vep_uc_steering;
+	dev->caps.vep_mc_steering    = dev_cap->vep_mc_steering;
+	dev->caps.wol		     = dev_cap->wol;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
 	dev->caps.log_num_macs  = log_num_mac;
@@ -718,8 +722,31 @@
 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
 }
 
+static int map_bf_area(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	resource_size_t bf_start;
+	resource_size_t bf_len;
+	int err = 0;
+
+	bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT);
+	bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT);
+	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
+	if (!priv->bf_mapping)
+		err = -ENOMEM;
+
+	return err;
+}
+
+static void unmap_bf_area(struct mlx4_dev *dev)
+{
+	if (mlx4_priv(dev)->bf_mapping)
+		io_mapping_free(mlx4_priv(dev)->bf_mapping);
+}
+
 static void mlx4_close_hca(struct mlx4_dev *dev)
 {
+	unmap_bf_area(dev);
 	mlx4_CLOSE_HCA(dev, 0);
 	mlx4_free_icms(dev);
 	mlx4_UNMAP_FA(dev);
@@ -772,6 +799,9 @@
 		goto err_stop_fw;
 	}
 
+	if (map_bf_area(dev))
+		mlx4_dbg(dev, "Failed to map blue flame area\n");
+
 	init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
 
 	err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
@@ -802,6 +832,7 @@
 	mlx4_free_icms(dev);
 
 err_stop_fw:
+	unmap_bf_area(dev);
 	mlx4_UNMAP_FA(dev);
 	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
 
@@ -969,13 +1000,15 @@
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct msix_entry *entries;
-	int nreq;
+	int nreq = min_t(int, dev->caps.num_ports *
+			 min_t(int, num_online_cpus() + 1, MAX_MSIX_P_PORT)
+				+ MSIX_LEGACY_SZ, MAX_MSIX);
 	int err;
 	int i;
 
 	if (msi_x) {
 		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
-			     num_possible_cpus() + 1);
+			     nreq);
 		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
 		if (!entries)
 			goto no_msi;
@@ -998,7 +1031,15 @@
 			goto no_msi;
 		}
 
-		dev->caps.num_comp_vectors = nreq - 1;
+		if (nreq <
+		    MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
+			/*Working in legacy mode , all EQ's shared*/
+			dev->caps.comp_pool           = 0;
+			dev->caps.num_comp_vectors = nreq - 1;
+		} else {
+			dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
+			dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
+		}
 		for (i = 0; i < nreq; ++i)
 			priv->eq_table.eq[i].irq = entries[i].vector;
 
@@ -1010,6 +1051,7 @@
 
 no_msi:
 	dev->caps.num_comp_vectors = 1;
+	dev->caps.comp_pool	   = 0;
 
 	for (i = 0; i < 2; ++i)
 		priv->eq_table.eq[i].irq = dev->pdev->irq;
@@ -1049,6 +1091,59 @@
 	device_remove_file(&info->dev->pdev->dev, &info->port_attr);
 }
 
+static int mlx4_init_steering(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int num_entries = dev->caps.num_ports;
+	int i, j;
+
+	priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
+	if (!priv->steer)
+		return -ENOMEM;
+
+	for (i = 0; i < num_entries; i++) {
+		for (j = 0; j < MLX4_NUM_STEERS; j++) {
+			INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
+			INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
+		}
+		INIT_LIST_HEAD(&priv->steer[i].high_prios);
+	}
+	return 0;
+}
+
+static void mlx4_clear_steering(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_steer_index *entry, *tmp_entry;
+	struct mlx4_promisc_qp *pqp, *tmp_pqp;
+	int num_entries = dev->caps.num_ports;
+	int i, j;
+
+	for (i = 0; i < num_entries; i++) {
+		for (j = 0; j < MLX4_NUM_STEERS; j++) {
+			list_for_each_entry_safe(pqp, tmp_pqp,
+						 &priv->steer[i].promisc_qps[j],
+						 list) {
+				list_del(&pqp->list);
+				kfree(pqp);
+			}
+			list_for_each_entry_safe(entry, tmp_entry,
+						 &priv->steer[i].steer_entries[j],
+						 list) {
+				list_del(&entry->list);
+				list_for_each_entry_safe(pqp, tmp_pqp,
+							 &entry->duplicates,
+							 list) {
+					list_del(&pqp->list);
+					kfree(pqp);
+				}
+				kfree(entry);
+			}
+		}
+	}
+	kfree(priv->steer);
+}
+
 static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct mlx4_priv *priv;
@@ -1130,6 +1225,11 @@
 	INIT_LIST_HEAD(&priv->pgdir_list);
 	mutex_init(&priv->pgdir_mutex);
 
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &dev->rev_id);
+
+	INIT_LIST_HEAD(&priv->bf_list);
+	mutex_init(&priv->bf_mutex);
+
 	/*
 	 * Now reset the HCA before we touch the PCI capabilities or
 	 * attempt a firmware command, since a boot ROM may have left
@@ -1154,8 +1254,15 @@
 	if (err)
 		goto err_close;
 
+	priv->msix_ctl.pool_bm = 0;
+	spin_lock_init(&priv->msix_ctl.pool_lock);
+
 	mlx4_enable_msi_x(dev);
 
+	err = mlx4_init_steering(dev);
+	if (err)
+		goto err_free_eq;
+
 	err = mlx4_setup_hca(dev);
 	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
 		dev->flags &= ~MLX4_FLAG_MSI_X;
@@ -1164,7 +1271,7 @@
 	}
 
 	if (err)
-		goto err_free_eq;
+		goto err_steer;
 
 	for (port = 1; port <= dev->caps.num_ports; port++) {
 		err = mlx4_init_port_info(dev, port);
@@ -1197,6 +1304,9 @@
 	mlx4_cleanup_pd_table(dev);
 	mlx4_cleanup_uar_table(dev);
 
+err_steer:
+	mlx4_clear_steering(dev);
+
 err_free_eq:
 	mlx4_free_eq_table(dev);
 
@@ -1256,6 +1366,7 @@
 		iounmap(priv->kar);
 		mlx4_uar_free(dev, &priv->driver_uar);
 		mlx4_cleanup_uar_table(dev);
+		mlx4_clear_steering(dev);
 		mlx4_free_eq_table(dev);
 		mlx4_close_hca(dev);
 		mlx4_cmd_cleanup(dev);

diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
index 79cf42d..e71372a 100644
--- a/drivers/net/mlx4/mcg.c
+++ b/drivers/net/mlx4/mcg.c

@@ -32,6 +32,7 @@
  */
 
 #include <linux/string.h>
+#include <linux/etherdevice.h>
 
 #include <linux/mlx4/cmd.h>
 
@@ -40,38 +41,40 @@
 #define MGM_QPN_MASK       0x00FFFFFF
 #define MGM_BLCK_LB_BIT    30
 
-struct mlx4_mgm {
-	__be32			next_gid_index;
-	__be32			members_count;
-	u32			reserved[2];
-	u8			gid[16];
-	__be32			qp[MLX4_QP_PER_MGM];
-};
-
 static const u8 zero_gid[16];	/* automatically initialized to 0 */
 
-static int mlx4_READ_MCG(struct mlx4_dev *dev, int index,
-			 struct mlx4_cmd_mailbox *mailbox)
+static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index,
+			   struct mlx4_cmd_mailbox *mailbox)
 {
 	return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG,
 			    MLX4_CMD_TIME_CLASS_A);
 }
 
-static int mlx4_WRITE_MCG(struct mlx4_dev *dev, int index,
-			  struct mlx4_cmd_mailbox *mailbox)
+static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index,
+			    struct mlx4_cmd_mailbox *mailbox)
 {
 	return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG,
 			MLX4_CMD_TIME_CLASS_A);
 }
 
-static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
-			  u16 *hash)
+static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 vep_num, u8 port, u8 steer,
+			      struct mlx4_cmd_mailbox *mailbox)
+{
+	u32 in_mod;
+
+	in_mod = (u32) vep_num << 24 | (u32) port << 16 | steer << 1;
+	return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1,
+			MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A);
+}
+
+static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+			 u16 *hash, u8 op_mod)
 {
 	u64 imm;
 	int err;
 
-	err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, 0, MLX4_CMD_MGID_HASH,
-			   MLX4_CMD_TIME_CLASS_A);
+	err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod,
+			   MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A);
 
 	if (!err)
 		*hash = imm;
@@ -79,6 +82,457 @@
 	return err;
 }
 
+static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 pf_num,
+					      enum mlx4_steer_type steer,
+					      u32 qpn)
+{
+	struct mlx4_steer *s_steer = &mlx4_priv(dev)->steer[pf_num];
+	struct mlx4_promisc_qp *pqp;
+
+	list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) {
+		if (pqp->qpn == qpn)
+			return pqp;
+	}
+	/* not found */
+	return NULL;
+}
+
+/*
+ * Add new entry to steering data structure.
+ * All promisc QPs should be added as well
+ */
+static int new_steering_entry(struct mlx4_dev *dev, u8 vep_num, u8 port,
+			      enum mlx4_steer_type steer,
+			      unsigned int index, u32 qpn)
+{
+	struct mlx4_steer *s_steer;
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_mgm *mgm;
+	u32 members_count;
+	struct mlx4_steer_index *new_entry;
+	struct mlx4_promisc_qp *pqp;
+	struct mlx4_promisc_qp *dqp;
+	u32 prot;
+	int err;
+	u8 pf_num;
+
+	pf_num = (dev->caps.num_ports == 1) ? vep_num : (vep_num << 1) | (port - 1);
+	s_steer = &mlx4_priv(dev)->steer[pf_num];
+	new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL);
+	if (!new_entry)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&new_entry->duplicates);
+	new_entry->index = index;
+	list_add_tail(&new_entry->list, &s_steer->steer_entries[steer]);
+
+	/* If the given qpn is also a promisc qp,
+	 * it should be inserted to duplicates list
+	 */
+	pqp = get_promisc_qp(dev, pf_num, steer, qpn);
+	if (pqp) {
+		dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+		if (!dqp) {
+			err = -ENOMEM;
+			goto out_alloc;
+		}
+		dqp->qpn = qpn;
+		list_add_tail(&dqp->list, &new_entry->duplicates);
+	}
+
+	/* if no promisc qps for this vep, we are done */
+	if (list_empty(&s_steer->promisc_qps[steer]))
+		return 0;
+
+	/* now need to add all the promisc qps to the new
+	 * steering entry, as they should also receive the packets
+	 * destined to this address */
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox)) {
+		err = -ENOMEM;
+		goto out_alloc;
+	}
+	mgm = mailbox->buf;
+
+	err = mlx4_READ_ENTRY(dev, index, mailbox);
+	if (err)
+		goto out_mailbox;
+
+	members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+	prot = be32_to_cpu(mgm->members_count) >> 30;
+	list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) {
+		/* don't add already existing qpn */
+		if (pqp->qpn == qpn)
+			continue;
+		if (members_count == MLX4_QP_PER_MGM) {
+			/* out of space */
+			err = -ENOMEM;
+			goto out_mailbox;
+		}
+
+		/* add the qpn */
+		mgm->qp[members_count++] = cpu_to_be32(pqp->qpn & MGM_QPN_MASK);
+	}
+	/* update the qps count and update the entry with all the promisc qps*/
+	mgm->members_count = cpu_to_be32(members_count | (prot << 30));
+	err = mlx4_WRITE_ENTRY(dev, index, mailbox);
+
+out_mailbox:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	if (!err)
+		return 0;
+out_alloc:
+	if (dqp) {
+		list_del(&dqp->list);
+		kfree(&dqp);
+	}
+	list_del(&new_entry->list);
+	kfree(new_entry);
+	return err;
+}
+
+/* update the data structures with existing steering entry */
+static int existing_steering_entry(struct mlx4_dev *dev, u8 vep_num, u8 port,
+				   enum mlx4_steer_type steer,
+				   unsigned int index, u32 qpn)
+{
+	struct mlx4_steer *s_steer;
+	struct mlx4_steer_index *tmp_entry, *entry = NULL;
+	struct mlx4_promisc_qp *pqp;
+	struct mlx4_promisc_qp *dqp;
+	u8 pf_num;
+
+	pf_num = (dev->caps.num_ports == 1) ? vep_num : (vep_num << 1) | (port - 1);
+	s_steer = &mlx4_priv(dev)->steer[pf_num];
+
+	pqp = get_promisc_qp(dev, pf_num, steer, qpn);
+	if (!pqp)
+		return 0; /* nothing to do */
+
+	list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) {
+		if (tmp_entry->index == index) {
+			entry = tmp_entry;
+			break;
+		}
+	}
+	if (unlikely(!entry)) {
+		mlx4_warn(dev, "Steering entry at index %x is not registered\n", index);
+		return -EINVAL;
+	}
+
+	/* the given qpn is listed as a promisc qpn
+	 * we need to add it as a duplicate to this entry
+	 * for future refernce */
+	list_for_each_entry(dqp, &entry->duplicates, list) {
+		if (qpn == dqp->qpn)
+			return 0; /* qp is already duplicated */
+	}
+
+	/* add the qp as a duplicate on this index */
+	dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+	if (!dqp)
+		return -ENOMEM;
+	dqp->qpn = qpn;
+	list_add_tail(&dqp->list, &entry->duplicates);
+
+	return 0;
+}
+
+/* Check whether a qpn is a duplicate on steering entry
+ * If so, it should not be removed from mgm */
+static bool check_duplicate_entry(struct mlx4_dev *dev, u8 vep_num, u8 port,
+				  enum mlx4_steer_type steer,
+				  unsigned int index, u32 qpn)
+{
+	struct mlx4_steer *s_steer;
+	struct mlx4_steer_index *tmp_entry, *entry = NULL;
+	struct mlx4_promisc_qp *dqp, *tmp_dqp;
+	u8 pf_num;
+
+	pf_num = (dev->caps.num_ports == 1) ? vep_num : (vep_num << 1) | (port - 1);
+	s_steer = &mlx4_priv(dev)->steer[pf_num];
+
+	/* if qp is not promisc, it cannot be duplicated */
+	if (!get_promisc_qp(dev, pf_num, steer, qpn))
+		return false;
+
+	/* The qp is promisc qp so it is a duplicate on this index
+	 * Find the index entry, and remove the duplicate */
+	list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) {
+		if (tmp_entry->index == index) {
+			entry = tmp_entry;
+			break;
+		}
+	}
+	if (unlikely(!entry)) {
+		mlx4_warn(dev, "Steering entry for index %x is not registered\n", index);
+		return false;
+	}
+	list_for_each_entry_safe(dqp, tmp_dqp, &entry->duplicates, list) {
+		if (dqp->qpn == qpn) {
+			list_del(&dqp->list);
+			kfree(dqp);
+		}
+	}
+	return true;
+}
+
+/* I a steering entry contains only promisc QPs, it can be removed. */
+static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 vep_num, u8 port,
+				      enum mlx4_steer_type steer,
+				      unsigned int index, u32 tqpn)
+{
+	struct mlx4_steer *s_steer;
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_mgm *mgm;
+	struct mlx4_steer_index *entry = NULL, *tmp_entry;
+	u32 qpn;
+	u32 members_count;
+	bool ret = false;
+	int i;
+	u8 pf_num;
+
+	pf_num = (dev->caps.num_ports == 1) ? vep_num : (vep_num << 1) | (port - 1);
+	s_steer = &mlx4_priv(dev)->steer[pf_num];
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return false;
+	mgm = mailbox->buf;
+
+	if (mlx4_READ_ENTRY(dev, index, mailbox))
+		goto out;
+	members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+	for (i = 0;  i < members_count; i++) {
+		qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK;
+		if (!get_promisc_qp(dev, pf_num, steer, qpn) && qpn != tqpn) {
+			/* the qp is not promisc, the entry can't be removed */
+			goto out;
+		}
+	}
+	 /* All the qps currently registered for this entry are promiscuous,
+	  * Checking for duplicates */
+	ret = true;
+	list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) {
+		if (entry->index == index) {
+			if (list_empty(&entry->duplicates)) {
+				list_del(&entry->list);
+				kfree(entry);
+			} else {
+				/* This entry contains duplicates so it shouldn't be removed */
+				ret = false;
+				goto out;
+			}
+		}
+	}
+
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return ret;
+}
+
+static int add_promisc_qp(struct mlx4_dev *dev, u8 vep_num, u8 port,
+			  enum mlx4_steer_type steer, u32 qpn)
+{
+	struct mlx4_steer *s_steer;
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_mgm *mgm;
+	struct mlx4_steer_index *entry;
+	struct mlx4_promisc_qp *pqp;
+	struct mlx4_promisc_qp *dqp;
+	u32 members_count;
+	u32 prot;
+	int i;
+	bool found;
+	int last_index;
+	int err;
+	u8 pf_num;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	pf_num = (dev->caps.num_ports == 1) ? vep_num : (vep_num << 1) | (port - 1);
+	s_steer = &mlx4_priv(dev)->steer[pf_num];
+
+	mutex_lock(&priv->mcg_table.mutex);
+
+	if (get_promisc_qp(dev, pf_num, steer, qpn)) {
+		err = 0;  /* Noting to do, already exists */
+		goto out_mutex;
+	}
+
+	pqp = kmalloc(sizeof *pqp, GFP_KERNEL);
+	if (!pqp) {
+		err = -ENOMEM;
+		goto out_mutex;
+	}
+	pqp->qpn = qpn;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox)) {
+		err = -ENOMEM;
+		goto out_alloc;
+	}
+	mgm = mailbox->buf;
+
+	/* the promisc qp needs to be added for each one of the steering
+	 * entries, if it already exists, needs to be added as a duplicate
+	 * for this entry */
+	list_for_each_entry(entry, &s_steer->steer_entries[steer], list) {
+		err = mlx4_READ_ENTRY(dev, entry->index, mailbox);
+		if (err)
+			goto out_mailbox;
+
+		members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+		prot = be32_to_cpu(mgm->members_count) >> 30;
+		found = false;
+		for (i = 0; i < members_count; i++) {
+			if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) {
+				/* Entry already exists, add to duplicates */
+				dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+				if (!dqp)
+					goto out_mailbox;
+				dqp->qpn = qpn;
+				list_add_tail(&dqp->list, &entry->duplicates);
+				found = true;
+			}
+		}
+		if (!found) {
+			/* Need to add the qpn to mgm */
+			if (members_count == MLX4_QP_PER_MGM) {
+				/* entry is full */
+				err = -ENOMEM;
+				goto out_mailbox;
+			}
+			mgm->qp[members_count++] = cpu_to_be32(qpn & MGM_QPN_MASK);
+			mgm->members_count = cpu_to_be32(members_count | (prot << 30));
+			err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox);
+			if (err)
+				goto out_mailbox;
+		}
+		last_index = entry->index;
+	}
+
+	/* add the new qpn to list of promisc qps */
+	list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]);
+	/* now need to add all the promisc qps to default entry */
+	memset(mgm, 0, sizeof *mgm);
+	members_count = 0;
+	list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list)
+		mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK);
+	mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30);
+
+	err = mlx4_WRITE_PROMISC(dev, vep_num, port, steer, mailbox);
+	if (err)
+		goto out_list;
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	mutex_unlock(&priv->mcg_table.mutex);
+	return 0;
+
+out_list:
+	list_del(&pqp->list);
+out_mailbox:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+out_alloc:
+	kfree(pqp);
+out_mutex:
+	mutex_unlock(&priv->mcg_table.mutex);
+	return err;
+}
+
+static int remove_promisc_qp(struct mlx4_dev *dev, u8 vep_num, u8 port,
+			     enum mlx4_steer_type steer, u32 qpn)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_steer *s_steer;
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_mgm *mgm;
+	struct mlx4_steer_index *entry;
+	struct mlx4_promisc_qp *pqp;
+	struct mlx4_promisc_qp *dqp;
+	u32 members_count;
+	bool found;
+	bool back_to_list = false;
+	int loc, i;
+	int err;
+	u8 pf_num;
+
+	pf_num = (dev->caps.num_ports == 1) ? vep_num : (vep_num << 1) | (port - 1);
+	s_steer = &mlx4_priv(dev)->steer[pf_num];
+	mutex_lock(&priv->mcg_table.mutex);
+
+	pqp = get_promisc_qp(dev, pf_num, steer, qpn);
+	if (unlikely(!pqp)) {
+		mlx4_warn(dev, "QP %x is not promiscuous QP\n", qpn);
+		/* nothing to do */
+		err = 0;
+		goto out_mutex;
+	}
+
+	/*remove from list of promisc qps */
+	list_del(&pqp->list);
+	kfree(pqp);
+
+	/* set the default entry not to include the removed one */
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox)) {
+		err = -ENOMEM;
+		back_to_list = true;
+		goto out_list;
+	}
+	mgm = mailbox->buf;
+	members_count = 0;
+	list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list)
+		mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK);
+	mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30);
+
+	err = mlx4_WRITE_PROMISC(dev, vep_num, port, steer, mailbox);
+	if (err)
+		goto out_mailbox;
+
+	/* remove the qp from all the steering entries*/
+	list_for_each_entry(entry, &s_steer->steer_entries[steer], list) {
+		found = false;
+		list_for_each_entry(dqp, &entry->duplicates, list) {
+			if (dqp->qpn == qpn) {
+				found = true;
+				break;
+			}
+		}
+		if (found) {
+			/* a duplicate, no need to change the mgm,
+			 * only update the duplicates list */
+			list_del(&dqp->list);
+			kfree(dqp);
+		} else {
+			err = mlx4_READ_ENTRY(dev, entry->index, mailbox);
+				if (err)
+					goto out_mailbox;
+			members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
+			for (loc = -1, i = 0; i < members_count; ++i)
+				if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn)
+					loc = i;
+
+			mgm->members_count = cpu_to_be32(--members_count |
+							 (MLX4_PROT_ETH << 30));
+			mgm->qp[loc] = mgm->qp[i - 1];
+			mgm->qp[i - 1] = 0;
+
+			err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox);
+				if (err)
+					goto out_mailbox;
+		}
+
+	}
+
+out_mailbox:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+out_list:
+	if (back_to_list)
+		list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]);
+out_mutex:
+	mutex_unlock(&priv->mcg_table.mutex);
+	return err;
+}
+
 /*
  * Caller must hold MCG table semaphore.  gid and mgm parameters must
  * be properly aligned for command interface.
@@ -94,15 +548,17 @@
  * If no AMGM exists for given gid, *index = -1, *prev = index of last
  * entry in hash chain and *mgm holds end of hash chain.
  */
-static int find_mgm(struct mlx4_dev *dev,
-		    u8 *gid, enum mlx4_protocol protocol,
-		    struct mlx4_cmd_mailbox *mgm_mailbox,
-		    u16 *hash, int *prev, int *index)
+static int find_entry(struct mlx4_dev *dev, u8 port,
+		      u8 *gid, enum mlx4_protocol prot,
+		      enum mlx4_steer_type steer,
+		      struct mlx4_cmd_mailbox *mgm_mailbox,
+		      u16 *hash, int *prev, int *index)
 {
 	struct mlx4_cmd_mailbox *mailbox;
 	struct mlx4_mgm *mgm = mgm_mailbox->buf;
 	u8 *mgid;
 	int err;
+	u8 op_mod = (prot == MLX4_PROT_ETH) ? !!(dev->caps.vep_mc_steering) : 0;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
@@ -111,7 +567,7 @@
 
 	memcpy(mgid, gid, 16);
 
-	err = mlx4_MGID_HASH(dev, mailbox, hash);
+	err = mlx4_GID_HASH(dev, mailbox, hash, op_mod);
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	if (err)
 		return err;
@@ -123,11 +579,11 @@
 	*prev  = -1;
 
 	do {
-		err = mlx4_READ_MCG(dev, *index, mgm_mailbox);
+		err = mlx4_READ_ENTRY(dev, *index, mgm_mailbox);
 		if (err)
 			return err;
 
-		if (!memcmp(mgm->gid, zero_gid, 16)) {
+		if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) {
 			if (*index != *hash) {
 				mlx4_err(dev, "Found zero MGID in AMGM.\n");
 				err = -EINVAL;
@@ -136,7 +592,7 @@
 		}
 
 		if (!memcmp(mgm->gid, gid, 16) &&
-		    be32_to_cpu(mgm->members_count) >> 30 == protocol)
+		    be32_to_cpu(mgm->members_count) >> 30 == prot)
 			return err;
 
 		*prev = *index;
@@ -147,8 +603,9 @@
 	return err;
 }
 
-int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
-			  int block_mcast_loopback, enum mlx4_protocol protocol)
+int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			  int block_mcast_loopback, enum mlx4_protocol prot,
+			  enum mlx4_steer_type steer)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cmd_mailbox *mailbox;
@@ -159,6 +616,8 @@
 	int link = 0;
 	int i;
 	int err;
+	u8 port = gid[5];
+	u8 new_entry = 0;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
@@ -166,14 +625,16 @@
 	mgm = mailbox->buf;
 
 	mutex_lock(&priv->mcg_table.mutex);
-
-	err = find_mgm(dev, gid, protocol, mailbox, &hash, &prev, &index);
+	err = find_entry(dev, port, gid, prot, steer,
+			 mailbox, &hash, &prev, &index);
 	if (err)
 		goto out;
 
 	if (index != -1) {
-		if (!memcmp(mgm->gid, zero_gid, 16))
+		if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) {
+			new_entry = 1;
 			memcpy(mgm->gid, gid, 16);
+		}
 	} else {
 		link = 1;
 
@@ -209,26 +670,34 @@
 	else
 		mgm->qp[members_count++] = cpu_to_be32(qp->qpn & MGM_QPN_MASK);
 
-	mgm->members_count = cpu_to_be32(members_count | (u32) protocol << 30);
+	mgm->members_count = cpu_to_be32(members_count | (u32) prot << 30);
 
-	err = mlx4_WRITE_MCG(dev, index, mailbox);
+	err = mlx4_WRITE_ENTRY(dev, index, mailbox);
 	if (err)
 		goto out;
 
 	if (!link)
 		goto out;
 
-	err = mlx4_READ_MCG(dev, prev, mailbox);
+	err = mlx4_READ_ENTRY(dev, prev, mailbox);
 	if (err)
 		goto out;
 
 	mgm->next_gid_index = cpu_to_be32(index << 6);
 
-	err = mlx4_WRITE_MCG(dev, prev, mailbox);
+	err = mlx4_WRITE_ENTRY(dev, prev, mailbox);
 	if (err)
 		goto out;
 
 out:
+	if (prot == MLX4_PROT_ETH) {
+		/* manage the steering entry for promisc mode */
+		if (new_entry)
+			new_steering_entry(dev, 0, port, steer, index, qp->qpn);
+		else
+			existing_steering_entry(dev, 0, port, steer,
+						index, qp->qpn);
+	}
 	if (err && link && index != -1) {
 		if (index < dev->caps.num_mgms)
 			mlx4_warn(dev, "Got AMGM index %d < %d",
@@ -242,10 +711,9 @@
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
 
-int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
-			  enum mlx4_protocol protocol)
+int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			  enum mlx4_protocol prot, enum mlx4_steer_type steer)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cmd_mailbox *mailbox;
@@ -255,6 +723,8 @@
 	int prev, index;
 	int i, loc;
 	int err;
+	u8 port = gid[5];
+	bool removed_entry = false;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
@@ -263,7 +733,8 @@
 
 	mutex_lock(&priv->mcg_table.mutex);
 
-	err = find_mgm(dev, gid, protocol, mailbox, &hash, &prev, &index);
+	err = find_entry(dev, port, gid, prot, steer,
+			 mailbox, &hash, &prev, &index);
 	if (err)
 		goto out;
 
@@ -273,6 +744,11 @@
 		goto out;
 	}
 
+	/* if this pq is also a promisc qp, it shouldn't be removed */
+	if (prot == MLX4_PROT_ETH &&
+	    check_duplicate_entry(dev, 0, port, steer, index, qp->qpn))
+		goto out;
+
 	members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
 	for (loc = -1, i = 0; i < members_count; ++i)
 		if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn)
@@ -285,26 +761,31 @@
 	}
 
 
-	mgm->members_count = cpu_to_be32(--members_count | (u32) protocol << 30);
+	mgm->members_count = cpu_to_be32(--members_count | (u32) prot << 30);
 	mgm->qp[loc]       = mgm->qp[i - 1];
 	mgm->qp[i - 1]     = 0;
 
-	if (i != 1) {
-		err = mlx4_WRITE_MCG(dev, index, mailbox);
+	if (prot == MLX4_PROT_ETH)
+		removed_entry = can_remove_steering_entry(dev, 0, port, steer, index, qp->qpn);
+	if (i != 1 && (prot != MLX4_PROT_ETH || !removed_entry)) {
+		err = mlx4_WRITE_ENTRY(dev, index, mailbox);
 		goto out;
 	}
 
+	/* We are going to delete the entry, members count should be 0 */
+	mgm->members_count = cpu_to_be32((u32) prot << 30);
+
 	if (prev == -1) {
 		/* Remove entry from MGM */
 		int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6;
 		if (amgm_index) {
-			err = mlx4_READ_MCG(dev, amgm_index, mailbox);
+			err = mlx4_READ_ENTRY(dev, amgm_index, mailbox);
 			if (err)
 				goto out;
 		} else
 			memset(mgm->gid, 0, 16);
 
-		err = mlx4_WRITE_MCG(dev, index, mailbox);
+		err = mlx4_WRITE_ENTRY(dev, index, mailbox);
 		if (err)
 			goto out;
 
@@ -319,13 +800,13 @@
 	} else {
 		/* Remove entry from AMGM */
 		int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
-		err = mlx4_READ_MCG(dev, prev, mailbox);
+		err = mlx4_READ_ENTRY(dev, prev, mailbox);
 		if (err)
 			goto out;
 
 		mgm->next_gid_index = cpu_to_be32(cur_next_index << 6);
 
-		err = mlx4_WRITE_MCG(dev, prev, mailbox);
+		err = mlx4_WRITE_ENTRY(dev, prev, mailbox);
 		if (err)
 			goto out;
 
@@ -343,8 +824,85 @@
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
 }
+
+
+int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			  int block_mcast_loopback, enum mlx4_protocol prot)
+{
+	enum mlx4_steer_type steer;
+
+	steer = (is_valid_ether_addr(&gid[10])) ? MLX4_UC_STEER : MLX4_MC_STEER;
+
+	if (prot == MLX4_PROT_ETH && !dev->caps.vep_mc_steering)
+		return 0;
+
+	if (prot == MLX4_PROT_ETH)
+		gid[7] |= (steer << 1);
+
+	return mlx4_qp_attach_common(dev, qp, gid,
+				     block_mcast_loopback, prot,
+				     steer);
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
+
+int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			  enum mlx4_protocol prot)
+{
+	enum mlx4_steer_type steer;
+
+	steer = (is_valid_ether_addr(&gid[10])) ? MLX4_UC_STEER : MLX4_MC_STEER;
+
+	if (prot == MLX4_PROT_ETH && !dev->caps.vep_mc_steering)
+		return 0;
+
+	if (prot == MLX4_PROT_ETH) {
+		gid[7] |= (steer << 1);
+	}
+
+	return mlx4_qp_detach_common(dev, qp, gid, prot, steer);
+}
 EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
 
+
+int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+	if (!dev->caps.vep_mc_steering)
+		return 0;
+
+
+	return add_promisc_qp(dev, 0, port, MLX4_MC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add);
+
+int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+	if (!dev->caps.vep_mc_steering)
+		return 0;
+
+
+	return remove_promisc_qp(dev, 0, port, MLX4_MC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove);
+
+int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+	if (!dev->caps.vep_mc_steering)
+		return 0;
+
+
+	return add_promisc_qp(dev, 0, port, MLX4_UC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add);
+
+int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
+{
+	if (!dev->caps.vep_mc_steering)
+		return 0;
+
+	return remove_promisc_qp(dev, 0, port, MLX4_UC_STEER, qpn);
+}
+EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_remove);
+
 int mlx4_init_mcg_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);

diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 0da5bb72..c1e0e5f 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h

@@ -105,6 +105,7 @@
 	u32			max;
 	u32                     reserved_top;
 	u32			mask;
+	u32			avail;
 	spinlock_t		lock;
 	unsigned long	       *table;
 };
@@ -162,6 +163,27 @@
 	u8			catas_bar;
 };
 
+#define MGM_QPN_MASK       0x00FFFFFF
+#define MGM_BLCK_LB_BIT    30
+
+struct mlx4_promisc_qp {
+	struct list_head list;
+	u32 qpn;
+};
+
+struct mlx4_steer_index {
+	struct list_head list;
+	unsigned int index;
+	struct list_head duplicates;
+};
+
+struct mlx4_mgm {
+	__be32			next_gid_index;
+	__be32			members_count;
+	u32			reserved[2];
+	u8			gid[16];
+	__be32			qp[MLX4_QP_PER_MGM];
+};
 struct mlx4_cmd {
 	struct pci_pool	       *pool;
 	void __iomem	       *hcr;
@@ -265,6 +287,10 @@
 	int			max;
 };
 
+struct mlx4_mac_entry {
+	u64 mac;
+};
+
 struct mlx4_port_info {
 	struct mlx4_dev	       *dev;
 	int			port;
@@ -272,7 +298,9 @@
 	struct device_attribute port_attr;
 	enum mlx4_port_type	tmp_type;
 	struct mlx4_mac_table	mac_table;
+	struct radix_tree_root	mac_tree;
 	struct mlx4_vlan_table	vlan_table;
+	int			base_qpn;
 };
 
 struct mlx4_sense {
@@ -282,6 +310,17 @@
 	struct delayed_work	sense_poll;
 };
 
+struct mlx4_msix_ctl {
+	u64		pool_bm;
+	spinlock_t	pool_lock;
+};
+
+struct mlx4_steer {
+	struct list_head promisc_qps[MLX4_NUM_STEERS];
+	struct list_head steer_entries[MLX4_NUM_STEERS];
+	struct list_head high_prios;
+};
+
 struct mlx4_priv {
 	struct mlx4_dev		dev;
 
@@ -313,6 +352,11 @@
 	struct mlx4_port_info	port[MLX4_MAX_PORTS + 1];
 	struct mlx4_sense       sense;
 	struct mutex		port_mutex;
+	struct mlx4_msix_ctl	msix_ctl;
+	struct mlx4_steer	*steer;
+	struct list_head	bf_list;
+	struct mutex		bf_mutex;
+	struct io_mapping	*bf_mapping;
 };
 
 static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
@@ -328,6 +372,7 @@
 void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj);
 u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align);
 void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt);
+u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap);
 int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
 		     u32 reserved_bot, u32 resetrved_top);
 void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);
@@ -403,4 +448,9 @@
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
 int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps);
 
+int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			  enum mlx4_protocol prot, enum mlx4_steer_type steer);
+int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			  int block_mcast_loopback, enum mlx4_protocol prot,
+			  enum mlx4_steer_type steer);
 #endif /* MLX4_H */

diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index dfed6a0..e30f609 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h

@@ -49,8 +49,8 @@
 #include "en_port.h"
 
 #define DRV_NAME	"mlx4_en"
-#define DRV_VERSION	"1.5.1.6"
-#define DRV_RELDATE	"August 2010"
+#define DRV_VERSION	"1.5.4.1"
+#define DRV_RELDATE	"March 2011"
 
 #define MLX4_EN_MSG_LEVEL	(NETIF_MSG_LINK | NETIF_MSG_IFDOWN)
 
@@ -62,6 +62,7 @@
 #define MLX4_EN_PAGE_SHIFT	12
 #define MLX4_EN_PAGE_SIZE	(1 << MLX4_EN_PAGE_SHIFT)
 #define MAX_RX_RINGS		16
+#define MIN_RX_RINGS		4
 #define TXBB_SIZE		64
 #define HEADROOM		(2048 / TXBB_SIZE + 1)
 #define STAMP_STRIDE		64
@@ -124,6 +125,7 @@
 #define MLX4_EN_RX_SIZE_THRESH		1024
 #define MLX4_EN_RX_RATE_THRESH		(1000000 / MLX4_EN_RX_COAL_TIME_HIGH)
 #define MLX4_EN_SAMPLE_INTERVAL		0
+#define MLX4_EN_AVG_PKT_SMALL		256
 
 #define MLX4_EN_AUTO_CONF	0xffff
 
@@ -214,6 +216,9 @@
 
 #define MLX4_EN_USE_SRQ		0x01000000
 
+#define MLX4_EN_CX3_LOW_ID	0x1000
+#define MLX4_EN_CX3_HIGH_ID	0x1005
+
 struct mlx4_en_rx_alloc {
 	struct page *page;
 	u16 offset;
@@ -243,6 +248,8 @@
 	unsigned long bytes;
 	unsigned long packets;
 	spinlock_t comp_lock;
+	struct mlx4_bf bf;
+	bool bf_enabled;
 };
 
 struct mlx4_en_rx_desc {
@@ -453,6 +460,7 @@
 	struct mlx4_en_rss_map rss_map;
 	u32 flags;
 #define MLX4_EN_FLAG_PROMISC	0x1
+#define MLX4_EN_FLAG_MC_PROMISC	0x2
 	u32 tx_ring_num;
 	u32 rx_ring_num;
 	u32 rx_skb_size;
@@ -461,6 +469,7 @@
 	u16 log_rx_info;
 
 	struct mlx4_en_tx_ring tx_ring[MAX_TX_RINGS];
+	int tx_vector;
 	struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS];
 	struct mlx4_en_cq tx_cq[MAX_TX_RINGS];
 	struct mlx4_en_cq rx_cq[MAX_RX_RINGS];
@@ -476,6 +485,13 @@
 	int mc_addrs_cnt;
 	struct mlx4_en_stat_out_mbox hw_stats;
 	int vids[128];
+	bool wol;
+};
+
+enum mlx4_en_wol {
+	MLX4_EN_WOL_MAGIC = (1ULL << 61),
+	MLX4_EN_WOL_ENABLED = (1ULL << 62),
+	MLX4_EN_WOL_DO_MODIFY = (1ULL << 63),
 };
 
 
@@ -486,12 +502,13 @@
 int mlx4_en_start_port(struct net_device *dev);
 void mlx4_en_stop_port(struct net_device *dev);
 
-void mlx4_en_free_resources(struct mlx4_en_priv *priv);
+void mlx4_en_free_resources(struct mlx4_en_priv *priv, bool reserve_vectors);
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
 
 int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 		      int entries, int ring, enum cq_type mode);
-void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
+			bool reserve_vectors);
 int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
@@ -503,7 +520,7 @@
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring,
-			   u32 size, u16 stride);
+			   int qpn, u32 size, u16 stride);
 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring);
 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_tx_ring *ring,

diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
index c4988d6..1286b88 100644
--- a/drivers/net/mlx4/pd.c
+++ b/drivers/net/mlx4/pd.c

@@ -32,12 +32,17 @@
  */
 
 #include <linux/errno.h>
+#include <linux/io-mapping.h>
 
 #include <asm/page.h>
 
 #include "mlx4.h"
 #include "icm.h"
 
+enum {
+	MLX4_NUM_RESERVED_UARS = 8
+};
+
 int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -77,6 +82,7 @@
 		return -ENOMEM;
 
 	uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
+	uar->map = NULL;
 
 	return 0;
 }
@@ -88,6 +94,102 @@
 }
 EXPORT_SYMBOL_GPL(mlx4_uar_free);
 
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_uar *uar;
+	int err = 0;
+	int idx;
+
+	if (!priv->bf_mapping)
+		return -ENOMEM;
+
+	mutex_lock(&priv->bf_mutex);
+	if (!list_empty(&priv->bf_list))
+		uar = list_entry(priv->bf_list.next, struct mlx4_uar, bf_list);
+	else {
+		if (mlx4_bitmap_avail(&priv->uar_table.bitmap) < MLX4_NUM_RESERVED_UARS) {
+			err = -ENOMEM;
+			goto out;
+		}
+		uar = kmalloc(sizeof *uar, GFP_KERNEL);
+		if (!uar) {
+			err = -ENOMEM;
+			goto out;
+		}
+		err = mlx4_uar_alloc(dev, uar);
+		if (err)
+			goto free_kmalloc;
+
+		uar->map = ioremap(uar->pfn << PAGE_SHIFT, PAGE_SIZE);
+		if (!uar->map) {
+			err = -ENOMEM;
+			goto free_uar;
+		}
+
+		uar->bf_map = io_mapping_map_wc(priv->bf_mapping, uar->index << PAGE_SHIFT);
+		if (!uar->bf_map) {
+			err = -ENOMEM;
+			goto unamp_uar;
+		}
+		uar->free_bf_bmap = 0;
+		list_add(&uar->bf_list, &priv->bf_list);
+	}
+
+	bf->uar = uar;
+	idx = ffz(uar->free_bf_bmap);
+	uar->free_bf_bmap |= 1 << idx;
+	bf->uar = uar;
+	bf->offset = 0;
+	bf->buf_size = dev->caps.bf_reg_size / 2;
+	bf->reg = uar->bf_map + idx * dev->caps.bf_reg_size;
+	if (uar->free_bf_bmap == (1 << dev->caps.bf_regs_per_page) - 1)
+		list_del_init(&uar->bf_list);
+
+	goto out;
+
+unamp_uar:
+	bf->uar = NULL;
+	iounmap(uar->map);
+
+free_uar:
+	mlx4_uar_free(dev, uar);
+
+free_kmalloc:
+	kfree(uar);
+
+out:
+	mutex_unlock(&priv->bf_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_alloc);
+
+void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int idx;
+
+	if (!bf->uar || !bf->uar->bf_map)
+		return;
+
+	mutex_lock(&priv->bf_mutex);
+	idx = (bf->reg - bf->uar->bf_map) / dev->caps.bf_reg_size;
+	bf->uar->free_bf_bmap &= ~(1 << idx);
+	if (!bf->uar->free_bf_bmap) {
+		if (!list_empty(&bf->uar->bf_list))
+			list_del(&bf->uar->bf_list);
+
+		io_mapping_unmap(bf->uar->bf_map);
+		iounmap(bf->uar->map);
+		mlx4_uar_free(dev, bf->uar);
+		kfree(bf->uar);
+	} else if (list_empty(&bf->uar->bf_list))
+		list_add(&bf->uar->bf_list, &priv->bf_list);
+
+	mutex_unlock(&priv->bf_mutex);
+}
+EXPORT_SYMBOL_GPL(mlx4_bf_free);
+
 int mlx4_init_uar_table(struct mlx4_dev *dev)
 {
 	if (dev->caps.num_uars <= 128) {

diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c
index 4513395..eca7d85 100644
--- a/drivers/net/mlx4/port.c
+++ b/drivers/net/mlx4/port.c

@@ -90,12 +90,79 @@
 	return err;
 }
 
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)
+static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port,
+			     u64 mac, int *qpn, u8 reserve)
 {
-	struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
+	struct mlx4_qp qp;
+	u8 gid[16] = {0};
+	int err;
+
+	if (reserve) {
+		err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
+		if (err) {
+			mlx4_err(dev, "Failed to reserve qp for mac registration\n");
+			return err;
+		}
+	}
+	qp.qpn = *qpn;
+
+	mac &= 0xffffffffffffULL;
+	mac = cpu_to_be64(mac << 16);
+	memcpy(&gid[10], &mac, ETH_ALEN);
+	gid[5] = port;
+	gid[7] = MLX4_UC_STEER << 1;
+
+	err = mlx4_qp_attach_common(dev, &qp, gid, 0,
+				    MLX4_PROT_ETH, MLX4_UC_STEER);
+	if (err && reserve)
+		mlx4_qp_release_range(dev, *qpn, 1);
+
+	return err;
+}
+
+static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port,
+				  u64 mac, int qpn, u8 free)
+{
+	struct mlx4_qp qp;
+	u8 gid[16] = {0};
+
+	qp.qpn = qpn;
+	mac &= 0xffffffffffffULL;
+	mac = cpu_to_be64(mac << 16);
+	memcpy(&gid[10], &mac, ETH_ALEN);
+	gid[5] = port;
+	gid[7] = MLX4_UC_STEER << 1;
+
+	mlx4_qp_detach_common(dev, &qp, gid, MLX4_PROT_ETH, MLX4_UC_STEER);
+	if (free)
+		mlx4_qp_release_range(dev, qpn, 1);
+}
+
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+	struct mlx4_mac_table *table = &info->mac_table;
+	struct mlx4_mac_entry *entry;
 	int i, err = 0;
 	int free = -1;
 
+	if (dev->caps.vep_uc_steering) {
+		err = mlx4_uc_steer_add(dev, port, mac, qpn, 1);
+		if (!err) {
+			entry = kmalloc(sizeof *entry, GFP_KERNEL);
+			if (!entry) {
+				mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
+				return -ENOMEM;
+			}
+			entry->mac = mac;
+			err = radix_tree_insert(&info->mac_tree, *qpn, entry);
+			if (err) {
+				mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
+				return err;
+			}
+		} else
+			return err;
+	}
 	mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac);
 	mutex_lock(&table->mutex);
 	for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
@@ -106,7 +173,6 @@
 
 		if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
 			/* MAC already registered, increase refernce count */
-			*index = i;
 			++table->refs[i];
 			goto out;
 		}
@@ -137,7 +203,8 @@
 		goto out;
 	}
 
-	*index = free;
+	if (!dev->caps.vep_uc_steering)
+		*qpn = info->base_qpn + free;
 	++table->total;
 out:
 	mutex_unlock(&table->mutex);
@@ -145,20 +212,52 @@
 }
 EXPORT_SYMBOL_GPL(mlx4_register_mac);
 
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index)
+static int validate_index(struct mlx4_dev *dev,
+			  struct mlx4_mac_table *table, int index)
 {
-	struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
+	int err = 0;
+
+	if (index < 0 || index >= table->max || !table->entries[index]) {
+		mlx4_warn(dev, "No valid Mac entry for the given index\n");
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static int find_index(struct mlx4_dev *dev,
+		      struct mlx4_mac_table *table, u64 mac)
+{
+	int i;
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
+			return i;
+	}
+	/* Mac not found */
+	return -EINVAL;
+}
+
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int qpn)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+	struct mlx4_mac_table *table = &info->mac_table;
+	int index = qpn - info->base_qpn;
+	struct mlx4_mac_entry *entry;
+
+	if (dev->caps.vep_uc_steering) {
+		entry = radix_tree_lookup(&info->mac_tree, qpn);
+		if (entry) {
+			mlx4_uc_steer_release(dev, port, entry->mac, qpn, 1);
+			radix_tree_delete(&info->mac_tree, qpn);
+			index = find_index(dev, table, entry->mac);
+			kfree(entry);
+		}
+	}
 
 	mutex_lock(&table->mutex);
-	if (!table->refs[index]) {
-		mlx4_warn(dev, "No MAC entry for index %d\n", index);
+
+	if (validate_index(dev, table, index))
 		goto out;
-	}
-	if (--table->refs[index]) {
-		mlx4_warn(dev, "Have more references for index %d,"
-			  "no need to modify MAC table\n", index);
-		goto out;
-	}
+
 	table->entries[index] = 0;
 	mlx4_set_port_mac_table(dev, port, table->entries);
 	--table->total;
@@ -167,6 +266,44 @@
 }
 EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
 
+int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac, u8 wrap)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+	struct mlx4_mac_table *table = &info->mac_table;
+	int index = qpn - info->base_qpn;
+	struct mlx4_mac_entry *entry;
+	int err;
+
+	if (dev->caps.vep_uc_steering) {
+		entry = radix_tree_lookup(&info->mac_tree, qpn);
+		if (!entry)
+			return -EINVAL;
+		index = find_index(dev, table, entry->mac);
+		mlx4_uc_steer_release(dev, port, entry->mac, qpn, 0);
+		entry->mac = new_mac;
+		err = mlx4_uc_steer_add(dev, port, entry->mac, &qpn, 0);
+		if (err || index < 0)
+			return err;
+	}
+
+	mutex_lock(&table->mutex);
+
+	err = validate_index(dev, table, index);
+	if (err)
+		goto out;
+
+	table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID);
+
+	err = mlx4_set_port_mac_table(dev, port, table->entries);
+	if (unlikely(err)) {
+		mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) new_mac);
+		table->entries[index] = 0;
+	}
+out:
+	mutex_unlock(&table->mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_replace_mac);
 static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
 				    __be32 *entries)
 {

diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c
index e749f82..b967647 100644
--- a/drivers/net/mlx4/profile.c
+++ b/drivers/net/mlx4/profile.c

@@ -107,9 +107,7 @@
 	profile[MLX4_RES_AUXC].num    = request->num_qp;
 	profile[MLX4_RES_SRQ].num     = request->num_srq;
 	profile[MLX4_RES_CQ].num      = request->num_cq;
-	profile[MLX4_RES_EQ].num      = min_t(unsigned, dev_cap->max_eqs,
-					      dev_cap->reserved_eqs +
-					      num_possible_cpus() + 1);
+	profile[MLX4_RES_EQ].num      = min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
 	profile[MLX4_RES_DMPT].num    = request->num_mpt;
 	profile[MLX4_RES_CMPT].num    = MLX4_NUM_CMPTS;
 	profile[MLX4_RES_MTT].num     = request->num_mtt;

diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index a7f2eed..1f4e868 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c

@@ -3645,6 +3645,7 @@
 			dma_free_coherent(&pdev->dev, bytes,
 					  ss->fw_stats, ss->fw_stats_bus);
 			ss->fw_stats = NULL;
+			netif_napi_del(&ss->napi);
 		}
 	}
 	kfree(mgp->ss);

diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
index 8c66e22..5098684 100644
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c

@@ -2441,7 +2441,7 @@
 	.resume = pch_gbe_io_resume
 };
 
-static struct pci_driver pch_gbe_pcidev = {
+static struct pci_driver pch_gbe_driver = {
 	.name = KBUILD_MODNAME,
 	.id_table = pch_gbe_pcidev_id,
 	.probe = pch_gbe_probe,
@@ -2458,7 +2458,7 @@
 {
 	int ret;
 
-	ret = pci_register_driver(&pch_gbe_pcidev);
+	ret = pci_register_driver(&pch_gbe_driver);
 	if (copybreak != PCH_GBE_COPYBREAK_DEFAULT) {
 		if (copybreak == 0) {
 			pr_info("copybreak disabled\n");
@@ -2472,7 +2472,7 @@
 
 static void __exit pch_gbe_exit_module(void)
 {
-	pci_unregister_driver(&pch_gbe_pcidev);
+	pci_unregister_driver(&pch_gbe_driver);
 }
 
 module_init(pch_gbe_init_module);

diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index b8bd936..d890679 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c

@@ -1054,6 +1054,7 @@
 {
 	struct pci_dev *pci_dev = efx->pci_dev;
 	dma_addr_t dma_mask = efx->type->max_dma_mask;
+	bool use_wc;
 	int rc;
 
 	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
@@ -1104,8 +1105,21 @@
 		rc = -EIO;
 		goto fail3;
 	}
-	efx->membase = ioremap_wc(efx->membase_phys,
-				  efx->type->mem_map_size);
+
+	/* bug22643: If SR-IOV is enabled then tx push over a write combined
+	 * mapping is unsafe. We need to disable write combining in this case.
+	 * MSI is unsupported when SR-IOV is enabled, and the firmware will
+	 * have removed the MSI capability. So write combining is safe if
+	 * there is an MSI capability.
+	 */
+	use_wc = (!EFX_WORKAROUND_22643(efx) ||
+		  pci_find_capability(pci_dev, PCI_CAP_ID_MSI));
+	if (use_wc)
+		efx->membase = ioremap_wc(efx->membase_phys,
+					  efx->type->mem_map_size);
+	else
+		efx->membase = ioremap_nocache(efx->membase_phys,
+					       efx->type->mem_map_size);
 	if (!efx->membase) {
 		netif_err(efx, probe, efx->net_dev,
 			  "could not map memory BAR at %llx+%x\n",

diff --git a/drivers/net/sfc/workarounds.h b/drivers/net/sfc/workarounds.h
index e4dd3a7..99ff114 100644
--- a/drivers/net/sfc/workarounds.h
+++ b/drivers/net/sfc/workarounds.h

@@ -38,6 +38,8 @@
 #define EFX_WORKAROUND_15783 EFX_WORKAROUND_ALWAYS
 /* Legacy interrupt storm when interrupt fifo fills */
 #define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA
+/* Write combining and sriov=enabled are incompatible */
+#define EFX_WORKAROUND_22643 EFX_WORKAROUND_SIENA
 
 /* Spurious parity errors in TSORT buffers */
 #define EFX_WORKAROUND_5129 EFX_WORKAROUND_FALCON_A

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index bc86f4b..727874d 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c

@@ -49,6 +49,8 @@
 
 struct smsc95xx_priv {
 	u32 mac_cr;
+	u32 hash_hi;
+	u32 hash_lo;
 	spinlock_t mac_cr_lock;
 	bool use_tx_csum;
 	bool use_rx_csum;
@@ -370,10 +372,11 @@
 {
 	struct usbnet *dev = netdev_priv(netdev);
 	struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
-	u32 hash_hi = 0;
-	u32 hash_lo = 0;
 	unsigned long flags;
 
+	pdata->hash_hi = 0;
+	pdata->hash_lo = 0;
+
 	spin_lock_irqsave(&pdata->mac_cr_lock, flags);
 
 	if (dev->net->flags & IFF_PROMISC) {
@@ -394,13 +397,13 @@
 			u32 bitnum = smsc95xx_hash(ha->addr);
 			u32 mask = 0x01 << (bitnum & 0x1F);
 			if (bitnum & 0x20)
-				hash_hi |= mask;
+				pdata->hash_hi |= mask;
 			else
-				hash_lo |= mask;
+				pdata->hash_lo |= mask;
 		}
 
 		netif_dbg(dev, drv, dev->net, "HASHH=0x%08X, HASHL=0x%08X\n",
-				   hash_hi, hash_lo);
+				   pdata->hash_hi, pdata->hash_lo);
 	} else {
 		netif_dbg(dev, drv, dev->net, "receive own packets only\n");
 		pdata->mac_cr &=
@@ -410,8 +413,8 @@
 	spin_unlock_irqrestore(&pdata->mac_cr_lock, flags);
 
 	/* Initiate async writes, as we can't wait for completion here */
-	smsc95xx_write_reg_async(dev, HASHH, &hash_hi);
-	smsc95xx_write_reg_async(dev, HASHL, &hash_lo);
+	smsc95xx_write_reg_async(dev, HASHH, &pdata->hash_hi);
+	smsc95xx_write_reg_async(dev, HASHL, &pdata->hash_lo);
 	smsc95xx_write_reg_async(dev, MAC_CR, &pdata->mac_cr);
 }
 

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 115f162..5248257 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c

@@ -2160,6 +2160,8 @@
 	if (!ath_drain_all_txq(sc, false))
 		ath_reset(sc, false);
 
+	ieee80211_wake_queues(hw);
+
 out:
 	ieee80211_queue_delayed_work(hw, &sc->tx_complete_work, 0);
 	mutex_unlock(&sc->mutex);

diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c
index 960d717..a3241cd 100644
--- a/drivers/net/wireless/ath/ath9k/rc.c
+++ b/drivers/net/wireless/ath/ath9k/rc.c

@@ -1328,7 +1328,7 @@
 
 	hdr = (struct ieee80211_hdr *)skb->data;
 	fc = hdr->frame_control;
-	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+	for (i = 0; i < sc->hw->max_rates; i++) {
 		struct ieee80211_tx_rate *rate = &tx_info->status.rates[i];
 		if (!rate->count)
 			break;

diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index ef22096..26734e5 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c

@@ -1725,8 +1725,8 @@
 	u8 tidno;
 
 	spin_lock_bh(&txctl->txq->axq_lock);
-
-	if (ieee80211_is_data_qos(hdr->frame_control) && txctl->an) {
+	if ((sc->sc_flags & SC_OP_TXAGGR) && txctl->an &&
+		ieee80211_is_data_qos(hdr->frame_control)) {
 		tidno = ieee80211_get_qos_ctl(hdr)[0] &
 			IEEE80211_QOS_CTL_TID_MASK;
 		tid = ATH_AN_2_TID(txctl->an, tidno);

diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-lib.c b/drivers/net/wireless/iwlwifi/iwl-agn-lib.c
index 2003c1d..08ccb94 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-lib.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-lib.c

@@ -2265,7 +2265,7 @@
 	int ret;
 
 	ret = wait_event_timeout(priv->_agn.notif_waitq,
-				 &wait_entry->triggered,
+				 wait_entry->triggered,
 				 timeout);
 
 	spin_lock_bh(&priv->_agn.notif_wait_lock);

diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 581dc9f..321b18b 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c

@@ -3009,14 +3009,17 @@
 
 	mutex_lock(&priv->mutex);
 
-	if (!priv->_agn.offchan_tx_skb)
-		return -EINVAL;
+	if (!priv->_agn.offchan_tx_skb) {
+		ret = -EINVAL;
+		goto unlock;
+	}
 
 	priv->_agn.offchan_tx_skb = NULL;
 
 	ret = iwl_scan_cancel_timeout(priv, 200);
 	if (ret)
 		ret = -EIO;
+unlock:
 	mutex_unlock(&priv->mutex);
 
 	return ret;

diff --git a/drivers/net/wireless/orinoco/cfg.c b/drivers/net/wireless/orinoco/cfg.c
index 09fae2f..736bbb9 100644
--- a/drivers/net/wireless/orinoco/cfg.c
+++ b/drivers/net/wireless/orinoco/cfg.c

@@ -153,6 +153,9 @@
 	priv->scan_request = request;
 
 	err = orinoco_hw_trigger_scan(priv, request->ssids);
+	/* On error the we aren't processing the request */
+	if (err)
+		priv->scan_request = NULL;
 
 	return err;
 }

diff --git a/drivers/net/wireless/orinoco/main.c b/drivers/net/wireless/orinoco/main.c
index f3d396e..62c6b2b 100644
--- a/drivers/net/wireless/orinoco/main.c
+++ b/drivers/net/wireless/orinoco/main.c

@@ -1376,13 +1376,13 @@
 
 	spin_lock_irqsave(&priv->scan_lock, flags);
 	list_for_each_entry_safe(sd, temp, &priv->scan_list, list) {
-		spin_unlock_irqrestore(&priv->scan_lock, flags);
 
 		buf = sd->buf;
 		len = sd->len;
 		type = sd->type;
 
 		list_del(&sd->list);
+		spin_unlock_irqrestore(&priv->scan_lock, flags);
 		kfree(sd);
 
 		if (len > 0) {

diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index f1a9214..4e36865 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c

@@ -719,6 +719,7 @@
 	{ USB_DEVICE(0x0b05, 0x1732), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x0b05, 0x1742), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x0b05, 0x1784), USB_DEVICE_DATA(&rt2800usb_ops) },
+	{ USB_DEVICE(0x1761, 0x0b05), USB_DEVICE_DATA(&rt2800usb_ops) },
 	/* AzureWave */
 	{ USB_DEVICE(0x13d3, 0x3247), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x13d3, 0x3273), USB_DEVICE_DATA(&rt2800usb_ops) },
@@ -913,7 +914,6 @@
 	{ USB_DEVICE(0x0b05, 0x1760), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x0b05, 0x1761), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x0b05, 0x1790), USB_DEVICE_DATA(&rt2800usb_ops) },
-	{ USB_DEVICE(0x1761, 0x0b05), USB_DEVICE_DATA(&rt2800usb_ops) },
 	/* AzureWave */
 	{ USB_DEVICE(0x13d3, 0x3262), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x13d3, 0x3284), USB_DEVICE_DATA(&rt2800usb_ops) },
@@ -937,6 +937,8 @@
 	{ USB_DEVICE(0x07d1, 0x3c13), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x07d1, 0x3c15), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x07d1, 0x3c17), USB_DEVICE_DATA(&rt2800usb_ops) },
+	/* Edimax */
+	{ USB_DEVICE(0x7392, 0x4085), USB_DEVICE_DATA(&rt2800usb_ops) },
 	/* Encore */
 	{ USB_DEVICE(0x203d, 0x14a1), USB_DEVICE_DATA(&rt2800usb_ops) },
 	/* Gemtek */
@@ -961,6 +963,7 @@
 	{ USB_DEVICE(0x1d4d, 0x0010), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x1d4d, 0x0011), USB_DEVICE_DATA(&rt2800usb_ops) },
 	/* Planex */
+	{ USB_DEVICE(0x2019, 0x5201), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x2019, 0xab24), USB_DEVICE_DATA(&rt2800usb_ops) },
 	/* Qcom */
 	{ USB_DEVICE(0x18e8, 0x6259), USB_DEVICE_DATA(&rt2800usb_ops) },
@@ -972,6 +975,8 @@
 	/* Sweex */
 	{ USB_DEVICE(0x177f, 0x0153), USB_DEVICE_DATA(&rt2800usb_ops) },
 	{ USB_DEVICE(0x177f, 0x0313), USB_DEVICE_DATA(&rt2800usb_ops) },
+	/* Toshiba */
+	{ USB_DEVICE(0x0930, 0x0a07), USB_DEVICE_DATA(&rt2800usb_ops) },
 	/* Zyxel */
 	{ USB_DEVICE(0x0586, 0x341a), USB_DEVICE_DATA(&rt2800usb_ops) },
 #endif

diff --git a/drivers/net/wireless/rtlwifi/efuse.c b/drivers/net/wireless/rtlwifi/efuse.c
index 4f92cba..f74a870 100644
--- a/drivers/net/wireless/rtlwifi/efuse.c
+++ b/drivers/net/wireless/rtlwifi/efuse.c

@@ -410,8 +410,8 @@
 
 	if (!efuse_shadow_update_chk(hw)) {
 		efuse_read_all_map(hw, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0]);
-		memcpy((void *)&rtlefuse->efuse_map[EFUSE_MODIFY_MAP][0],
-		       (void *)&rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
+		memcpy(&rtlefuse->efuse_map[EFUSE_MODIFY_MAP][0],
+		       &rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
 		       rtlpriv->cfg->maps[EFUSE_HWSET_MAX_SIZE]);
 
 		RT_TRACE(rtlpriv, COMP_EFUSE, DBG_LOUD,
@@ -446,9 +446,9 @@
 
 		if (word_en != 0x0F) {
 			u8 tmpdata[8];
-			memcpy((void *)tmpdata,
-			       (void *)(&rtlefuse->
-					efuse_map[EFUSE_MODIFY_MAP][base]), 8);
+			memcpy(tmpdata,
+			       &rtlefuse->efuse_map[EFUSE_MODIFY_MAP][base],
+			       8);
 			RT_PRINT_DATA(rtlpriv, COMP_INIT, DBG_LOUD,
 				      ("U-efuse\n"), tmpdata, 8);
 
@@ -465,8 +465,8 @@
 	efuse_power_switch(hw, true, false);
 	efuse_read_all_map(hw, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0]);
 
-	memcpy((void *)&rtlefuse->efuse_map[EFUSE_MODIFY_MAP][0],
-	       (void *)&rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
+	memcpy(&rtlefuse->efuse_map[EFUSE_MODIFY_MAP][0],
+	       &rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
 	       rtlpriv->cfg->maps[EFUSE_HWSET_MAX_SIZE]);
 
 	RT_TRACE(rtlpriv, COMP_EFUSE, DBG_LOUD, ("<---\n"));
@@ -479,13 +479,12 @@
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 
 	if (rtlefuse->autoload_failflag == true) {
-		memset((void *)(&rtlefuse->efuse_map[EFUSE_INIT_MAP][0]), 128,
-		       0xFF);
+		memset(&rtlefuse->efuse_map[EFUSE_INIT_MAP][0], 0xFF, 128);
 	} else
 		efuse_read_all_map(hw, &rtlefuse->efuse_map[EFUSE_INIT_MAP][0]);
 
-	memcpy((void *)&rtlefuse->efuse_map[EFUSE_MODIFY_MAP][0],
-	       (void *)&rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
+	memcpy(&rtlefuse->efuse_map[EFUSE_MODIFY_MAP][0],
+	       &rtlefuse->efuse_map[EFUSE_INIT_MAP][0],
 	       rtlpriv->cfg->maps[EFUSE_HWSET_MAX_SIZE]);
 
 }
@@ -694,8 +693,8 @@
 	if (offset > 15)
 		return false;
 
-	memset((void *)data, PGPKT_DATA_SIZE * sizeof(u8), 0xff);
-	memset((void *)tmpdata, PGPKT_DATA_SIZE * sizeof(u8), 0xff);
+	memset(data, 0xff, PGPKT_DATA_SIZE * sizeof(u8));
+	memset(tmpdata, 0xff, PGPKT_DATA_SIZE * sizeof(u8));
 
 	while (bcontinual && (efuse_addr < EFUSE_MAX_SIZE)) {
 		if (readstate & PG_STATE_HEADER) {
@@ -862,7 +861,7 @@
 
 		tmp_word_cnts = efuse_calculate_word_cnts(tmp_pkt.word_en);
 
-		memset((void *)originaldata, 8 * sizeof(u8), 0xff);
+		memset(originaldata, 0xff, 8 * sizeof(u8));
 
 		if (efuse_pg_packet_read(hw, tmp_pkt.offset, originaldata)) {
 			badworden = efuse_word_enable_data_write(hw,
@@ -917,7 +916,7 @@
 	target_pkt.offset = offset;
 	target_pkt.word_en = word_en;
 
-	memset((void *)target_pkt.data, 8 * sizeof(u8), 0xFF);
+	memset(target_pkt.data, 0xFF, 8 * sizeof(u8));
 
 	efuse_word_enable_data_read(word_en, data, target_pkt.data);
 	target_word_cnts = efuse_calculate_word_cnts(target_pkt.word_en);
@@ -1022,7 +1021,7 @@
 	u8 badworden = 0x0F;
 	u8 tmpdata[8];
 
-	memset((void *)tmpdata, PGPKT_DATA_SIZE, 0xff);
+	memset(tmpdata, 0xff, PGPKT_DATA_SIZE);
 	RT_TRACE(rtlpriv, COMP_EFUSE, DBG_LOUD,
 		 ("word_en = %x efuse_addr=%x\n", word_en, efuse_addr));
 

diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c
index 81e8048..58236e6 100644
--- a/drivers/net/wireless/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zd1211rw/zd_usb.c

@@ -60,6 +60,7 @@
 	{ USB_DEVICE(0x157e, 0x300a), .driver_info = DEVICE_ZD1211 },
 	{ USB_DEVICE(0x157e, 0x300b), .driver_info = DEVICE_ZD1211 },
 	{ USB_DEVICE(0x157e, 0x3204), .driver_info = DEVICE_ZD1211 },
+	{ USB_DEVICE(0x157e, 0x3207), .driver_info = DEVICE_ZD1211 },
 	{ USB_DEVICE(0x1740, 0x2000), .driver_info = DEVICE_ZD1211 },
 	{ USB_DEVICE(0x6891, 0xa727), .driver_info = DEVICE_ZD1211 },
 	/* ZD1211B */

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 4789f8e..a4115f1 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c

@@ -36,7 +36,7 @@
 #include <linux/iova.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/tboot.h>
 #include <linux/dmi.h>
 #include <asm/cacheflush.h>
@@ -3135,7 +3135,7 @@
 	}
 }
 
-static int iommu_suspend(struct sys_device *dev, pm_message_t state)
+static int iommu_suspend(void)
 {
 	struct dmar_drhd_unit *drhd;
 	struct intel_iommu *iommu = NULL;
@@ -3175,7 +3175,7 @@
 	return -ENOMEM;
 }
 
-static int iommu_resume(struct sys_device *dev)
+static void iommu_resume(void)
 {
 	struct dmar_drhd_unit *drhd;
 	struct intel_iommu *iommu = NULL;
@@ -3183,7 +3183,7 @@
 
 	if (init_iommu_hw()) {
 		WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
-		return -EIO;
+		return;
 	}
 
 	for_each_active_iommu(iommu, drhd) {
@@ -3204,40 +3204,20 @@
 
 	for_each_active_iommu(iommu, drhd)
 		kfree(iommu->iommu_state);
-
-	return 0;
 }
 
-static struct sysdev_class iommu_sysclass = {
-	.name		= "iommu",
+static struct syscore_ops iommu_syscore_ops = {
 	.resume		= iommu_resume,
 	.suspend	= iommu_suspend,
 };
 
-static struct sys_device device_iommu = {
-	.cls	= &iommu_sysclass,
-};
-
-static int __init init_iommu_sysfs(void)
+static void __init init_iommu_pm_ops(void)
 {
-	int error;
-
-	error = sysdev_class_register(&iommu_sysclass);
-	if (error)
-		return error;
-
-	error = sysdev_register(&device_iommu);
-	if (error)
-		sysdev_class_unregister(&iommu_sysclass);
-
-	return error;
+	register_syscore_ops(&iommu_syscore_ops);
 }
 
 #else
-static int __init init_iommu_sysfs(void)
-{
-	return 0;
-}
+static inline int init_iommu_pm_ops(void) { }
 #endif	/* CONFIG_PM */
 
 /*
@@ -3320,7 +3300,7 @@
 #endif
 	dma_ops = &intel_dma_ops;
 
-	init_iommu_sysfs();
+	init_iommu_pm_ops();
 
 	register_iommu(&intel_iommu_ops);
 

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b714d78..2472e71 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c

@@ -740,6 +740,12 @@
 
 	if (!__pci_complete_power_transition(dev, state))
 		error = 0;
+	/*
+	 * When aspm_policy is "powersave" this call ensures
+	 * that ASPM is configured.
+	 */
+	if (!error && dev->bus->self)
+		pcie_aspm_powersave_config_link(dev->bus->self);
 
 	return error;
 }

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 3188cd9..eee09f7 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c

@@ -69,6 +69,7 @@
 };
 
 static int aspm_disabled, aspm_force, aspm_clear_state;
+static bool aspm_support_enabled = true;
 static DEFINE_MUTEX(aspm_lock);
 static LIST_HEAD(link_list);
 
@@ -707,6 +708,28 @@
 	up_read(&pci_bus_sem);
 }
 
+void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
+{
+	struct pcie_link_state *link = pdev->link_state;
+
+	if (aspm_disabled || !pci_is_pcie(pdev) || !link)
+		return;
+
+	if (aspm_policy != POLICY_POWERSAVE)
+		return;
+
+	if ((pdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
+	    (pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
+		return;
+
+	down_read(&pci_bus_sem);
+	mutex_lock(&aspm_lock);
+	pcie_config_aspm_path(link);
+	pcie_set_clkpm(link, policy_to_clkpm_state(link));
+	mutex_unlock(&aspm_lock);
+	up_read(&pci_bus_sem);
+}
+
 /*
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
@@ -747,6 +770,8 @@
 	int i;
 	struct pcie_link_state *link;
 
+	if (aspm_disabled)
+		return -EPERM;
 	for (i = 0; i < ARRAY_SIZE(policy_str); i++)
 		if (!strncmp(val, policy_str[i], strlen(policy_str[i])))
 			break;
@@ -801,6 +826,8 @@
 	struct pcie_link_state *link, *root = pdev->link_state->root;
 	u32 val = buf[0] - '0', state = 0;
 
+	if (aspm_disabled)
+		return -EPERM;
 	if (n < 1 || val > 3)
 		return -EINVAL;
 
@@ -896,6 +923,7 @@
 {
 	if (!strcmp(str, "off")) {
 		aspm_disabled = 1;
+		aspm_support_enabled = false;
 		printk(KERN_INFO "PCIe ASPM is disabled\n");
 	} else if (!strcmp(str, "force")) {
 		aspm_force = 1;
@@ -930,3 +958,8 @@
 }
 EXPORT_SYMBOL(pcie_aspm_enabled);
 
+bool pcie_aspm_support_enabled(void)
+{
+	return aspm_support_enabled;
+}
+EXPORT_SYMBOL(pcie_aspm_support_enabled);

diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 5130d0d..595654a 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c

@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include <linux/pcieport_if.h>
 #include <linux/aer.h>
-#include <linux/pci-aspm.h>
 
 #include "../pci.h"
 #include "portdrv.h"
@@ -356,10 +355,8 @@
 
 	/* Get and check PCI Express port services */
 	capabilities = get_port_device_capability(dev);
-	if (!capabilities) {
-		pcie_no_aspm();
+	if (!capabilities)
 		return 0;
-	}
 
 	pci_set_master(dev);
 	/*

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 61bf5d7..52a462f 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig

@@ -117,10 +117,24 @@
 
 config BATTERY_BQ27x00
 	tristate "BQ27x00 battery driver"
+	help
+	  Say Y here to enable support for batteries with BQ27x00 (I2C/HDQ) chips.
+
+config BATTERY_BQ27X00_I2C
+	bool "BQ27200/BQ27500 support"
+	depends on BATTERY_BQ27x00
 	depends on I2C
+	default y
 	help
 	  Say Y here to enable support for batteries with BQ27x00 (I2C) chips.
 
+config BATTERY_BQ27X00_PLATFORM
+	bool "BQ27000 support"
+	depends on BATTERY_BQ27x00
+	default y
+	help
+	  Say Y here to enable support for batteries with BQ27000 (HDQ) chips.
+
 config BATTERY_DA9030
 	tristate "DA9030 battery driver"
 	depends on PMIC_DA903X

diff --git a/drivers/power/bq20z75.c b/drivers/power/bq20z75.c
index 492da27..506585e 100644
--- a/drivers/power/bq20z75.c
+++ b/drivers/power/bq20z75.c

@@ -25,6 +25,10 @@
 #include <linux/power_supply.h>
 #include <linux/i2c.h>
 #include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+
+#include <linux/power/bq20z75.h>
 
 enum {
 	REG_MANUFACTURER_DATA,
@@ -38,11 +42,22 @@
 	REG_CYCLE_COUNT,
 	REG_SERIAL_NUMBER,
 	REG_REMAINING_CAPACITY,
+	REG_REMAINING_CAPACITY_CHARGE,
 	REG_FULL_CHARGE_CAPACITY,
+	REG_FULL_CHARGE_CAPACITY_CHARGE,
 	REG_DESIGN_CAPACITY,
+	REG_DESIGN_CAPACITY_CHARGE,
 	REG_DESIGN_VOLTAGE,
 };
 
+/* Battery Mode defines */
+#define BATTERY_MODE_OFFSET		0x03
+#define BATTERY_MODE_MASK		0x8000
+enum bq20z75_battery_mode {
+	BATTERY_MODE_AMPS,
+	BATTERY_MODE_WATTS
+};
+
 /* manufacturer access defines */
 #define MANUFACTURER_ACCESS_STATUS	0x0006
 #define MANUFACTURER_ACCESS_SLEEP	0x0011
@@ -78,8 +93,12 @@
 		BQ20Z75_DATA(POWER_SUPPLY_PROP_CAPACITY, 0x0E, 0, 100),
 	[REG_REMAINING_CAPACITY] =
 		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_NOW, 0x0F, 0, 65535),
+	[REG_REMAINING_CAPACITY_CHARGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_NOW, 0x0F, 0, 65535),
 	[REG_FULL_CHARGE_CAPACITY] =
 		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
+	[REG_FULL_CHARGE_CAPACITY_CHARGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
 	[REG_TIME_TO_EMPTY] =
 		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0,
 			65535),
@@ -93,6 +112,9 @@
 	[REG_DESIGN_CAPACITY] =
 		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, 0x18, 0,
 			65535),
+	[REG_DESIGN_CAPACITY_CHARGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, 0x18, 0,
+			65535),
 	[REG_DESIGN_VOLTAGE] =
 		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, 0x19, 0,
 			65535),
@@ -117,39 +139,72 @@
 	POWER_SUPPLY_PROP_ENERGY_NOW,
 	POWER_SUPPLY_PROP_ENERGY_FULL,
 	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
 };
 
 struct bq20z75_info {
-	struct i2c_client	*client;
-	struct power_supply	power_supply;
+	struct i2c_client		*client;
+	struct power_supply		power_supply;
+	struct bq20z75_platform_data	*pdata;
+	bool				is_present;
+	bool				gpio_detect;
+	bool				enable_detection;
+	int				irq;
 };
 
 static int bq20z75_read_word_data(struct i2c_client *client, u8 address)
 {
-	s32 ret;
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret = 0;
+	int retries = 1;
 
-	ret = i2c_smbus_read_word_data(client, address);
+	if (bq20z75_device->pdata)
+		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+
+	while (retries > 0) {
+		ret = i2c_smbus_read_word_data(client, address);
+		if (ret >= 0)
+			break;
+		retries--;
+	}
+
 	if (ret < 0) {
-		dev_err(&client->dev,
+		dev_dbg(&client->dev,
 			"%s: i2c read at address 0x%x failed\n",
 			__func__, address);
 		return ret;
 	}
+
 	return le16_to_cpu(ret);
 }
 
 static int bq20z75_write_word_data(struct i2c_client *client, u8 address,
 	u16 value)
 {
-	s32 ret;
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret = 0;
+	int retries = 1;
 
-	ret = i2c_smbus_write_word_data(client, address, le16_to_cpu(value));
+	if (bq20z75_device->pdata)
+		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+
+	while (retries > 0) {
+		ret = i2c_smbus_write_word_data(client, address,
+			le16_to_cpu(value));
+		if (ret >= 0)
+			break;
+		retries--;
+	}
+
 	if (ret < 0) {
-		dev_err(&client->dev,
+		dev_dbg(&client->dev,
 			"%s: i2c write to address 0x%x failed\n",
 			__func__, address);
 		return ret;
 	}
+
 	return 0;
 }
 
@@ -158,6 +213,19 @@
 	union power_supply_propval *val)
 {
 	s32 ret;
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+
+	if (psp == POWER_SUPPLY_PROP_PRESENT &&
+		bq20z75_device->gpio_detect) {
+		ret = gpio_get_value(
+			bq20z75_device->pdata->battery_detect);
+		if (ret == bq20z75_device->pdata->battery_detect_present)
+			val->intval = 1;
+		else
+			val->intval = 0;
+		bq20z75_device->is_present = val->intval;
+		return ret;
+	}
 
 	/* Write to ManufacturerAccess with
 	 * ManufacturerAccess command and then
@@ -165,9 +233,11 @@
 	ret = bq20z75_write_word_data(client,
 		bq20z75_data[REG_MANUFACTURER_DATA].addr,
 		MANUFACTURER_ACCESS_STATUS);
-	if (ret < 0)
+	if (ret < 0) {
+		if (psp == POWER_SUPPLY_PROP_PRESENT)
+			val->intval = 0; /* battery removed */
 		return ret;
-
+	}
 
 	ret = bq20z75_read_word_data(client,
 		bq20z75_data[REG_MANUFACTURER_DATA].addr);
@@ -248,30 +318,39 @@
 {
 #define BASE_UNIT_CONVERSION		1000
 #define BATTERY_MODE_CAP_MULT_WATT	(10 * BASE_UNIT_CONVERSION)
-#define TIME_UNIT_CONVERSION		600
-#define TEMP_KELVIN_TO_CELCIUS		2731
+#define TIME_UNIT_CONVERSION		60
+#define TEMP_KELVIN_TO_CELSIUS		2731
 	switch (psp) {
 	case POWER_SUPPLY_PROP_ENERGY_NOW:
 	case POWER_SUPPLY_PROP_ENERGY_FULL:
 	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+		/* bq20z75 provides energy in units of 10mWh.
+		 * Convert to µWh
+		 */
 		val->intval *= BATTERY_MODE_CAP_MULT_WATT;
 		break;
 
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
 	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
 		val->intval *= BASE_UNIT_CONVERSION;
 		break;
 
 	case POWER_SUPPLY_PROP_TEMP:
-		/* bq20z75 provides battery tempreture in 0.1°K
-		 * so convert it to 0.1°C */
-		val->intval -= TEMP_KELVIN_TO_CELCIUS;
-		val->intval *= 10;
+		/* bq20z75 provides battery temperature in 0.1K
+		 * so convert it to 0.1°C
+		 */
+		val->intval -= TEMP_KELVIN_TO_CELSIUS;
 		break;
 
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
 	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
+		/* bq20z75 provides time to empty and time to full in minutes.
+		 * Convert to seconds
+		 */
 		val->intval *= TIME_UNIT_CONVERSION;
 		break;
 
@@ -281,11 +360,44 @@
 	}
 }
 
+static enum bq20z75_battery_mode
+bq20z75_set_battery_mode(struct i2c_client *client,
+	enum bq20z75_battery_mode mode)
+{
+	int ret, original_val;
+
+	original_val = bq20z75_read_word_data(client, BATTERY_MODE_OFFSET);
+	if (original_val < 0)
+		return original_val;
+
+	if ((original_val & BATTERY_MODE_MASK) == mode)
+		return mode;
+
+	if (mode == BATTERY_MODE_AMPS)
+		ret = original_val & ~BATTERY_MODE_MASK;
+	else
+		ret = original_val | BATTERY_MODE_MASK;
+
+	ret = bq20z75_write_word_data(client, BATTERY_MODE_OFFSET, ret);
+	if (ret < 0)
+		return ret;
+
+	return original_val & BATTERY_MODE_MASK;
+}
+
 static int bq20z75_get_battery_capacity(struct i2c_client *client,
 	int reg_offset, enum power_supply_property psp,
 	union power_supply_propval *val)
 {
 	s32 ret;
+	enum bq20z75_battery_mode mode = BATTERY_MODE_WATTS;
+
+	if (power_supply_is_amp_property(psp))
+		mode = BATTERY_MODE_AMPS;
+
+	mode = bq20z75_set_battery_mode(client, mode);
+	if (mode < 0)
+		return mode;
 
 	ret = bq20z75_read_word_data(client, bq20z75_data[reg_offset].addr);
 	if (ret < 0)
@@ -298,6 +410,10 @@
 	} else
 		val->intval = ret;
 
+	ret = bq20z75_set_battery_mode(client, mode);
+	if (ret < 0)
+		return ret;
+
 	return 0;
 }
 
@@ -318,12 +434,25 @@
 	return 0;
 }
 
+static int bq20z75_get_property_index(struct i2c_client *client,
+	enum power_supply_property psp)
+{
+	int count;
+	for (count = 0; count < ARRAY_SIZE(bq20z75_data); count++)
+		if (psp == bq20z75_data[count].psp)
+			return count;
+
+	dev_warn(&client->dev,
+		"%s: Invalid Property - %d\n", __func__, psp);
+
+	return -EINVAL;
+}
+
 static int bq20z75_get_property(struct power_supply *psy,
 	enum power_supply_property psp,
 	union power_supply_propval *val)
 {
-	int count;
-	int ret;
+	int ret = 0;
 	struct bq20z75_info *bq20z75_device = container_of(psy,
 				struct bq20z75_info, power_supply);
 	struct i2c_client *client = bq20z75_device->client;
@@ -332,8 +461,8 @@
 	case POWER_SUPPLY_PROP_PRESENT:
 	case POWER_SUPPLY_PROP_HEALTH:
 		ret = bq20z75_get_battery_presence_and_health(client, psp, val);
-		if (ret)
-			return ret;
+		if (psp == POWER_SUPPLY_PROP_PRESENT)
+			return 0;
 		break;
 
 	case POWER_SUPPLY_PROP_TECHNOLOGY:
@@ -343,22 +472,19 @@
 	case POWER_SUPPLY_PROP_ENERGY_NOW:
 	case POWER_SUPPLY_PROP_ENERGY_FULL:
 	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
 	case POWER_SUPPLY_PROP_CAPACITY:
-		for (count = 0; count < ARRAY_SIZE(bq20z75_data); count++) {
-			if (psp == bq20z75_data[count].psp)
-				break;
-		}
+		ret = bq20z75_get_property_index(client, psp);
+		if (ret < 0)
+			break;
 
-		ret = bq20z75_get_battery_capacity(client, count, psp, val);
-		if (ret)
-			return ret;
-
+		ret = bq20z75_get_battery_capacity(client, ret, psp, val);
 		break;
 
 	case POWER_SUPPLY_PROP_SERIAL_NUMBER:
 		ret = bq20z75_get_battery_serial_number(client, val);
-		if (ret)
-			return ret;
 		break;
 
 	case POWER_SUPPLY_PROP_STATUS:
@@ -369,15 +495,11 @@
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
 	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
 	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-		for (count = 0; count < ARRAY_SIZE(bq20z75_data); count++) {
-			if (psp == bq20z75_data[count].psp)
-				break;
-		}
+		ret = bq20z75_get_property_index(client, psp);
+		if (ret < 0)
+			break;
 
-		ret = bq20z75_get_battery_property(client, count, psp, val);
-		if (ret)
-			return ret;
-
+		ret = bq20z75_get_battery_property(client, ret, psp, val);
 		break;
 
 	default:
@@ -386,26 +508,58 @@
 		return -EINVAL;
 	}
 
-	/* Convert units to match requirements for power supply class */
-	bq20z75_unit_adjustment(client, psp, val);
+	if (!bq20z75_device->enable_detection)
+		goto done;
+
+	if (!bq20z75_device->gpio_detect &&
+		bq20z75_device->is_present != (ret >= 0)) {
+		bq20z75_device->is_present = (ret >= 0);
+		power_supply_changed(&bq20z75_device->power_supply);
+	}
+
+done:
+	if (!ret) {
+		/* Convert units to match requirements for power supply class */
+		bq20z75_unit_adjustment(client, psp, val);
+	}
 
 	dev_dbg(&client->dev,
-		"%s: property = %d, value = %d\n", __func__, psp, val->intval);
+		"%s: property = %d, value = %x\n", __func__, psp, val->intval);
+
+	if (ret && bq20z75_device->is_present)
+		return ret;
+
+	/* battery not present, so return NODATA for properties */
+	if (ret)
+		return -ENODATA;
 
 	return 0;
 }
 
-static int bq20z75_probe(struct i2c_client *client,
+static irqreturn_t bq20z75_irq(int irq, void *devid)
+{
+	struct power_supply *battery = devid;
+
+	power_supply_changed(battery);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit bq20z75_probe(struct i2c_client *client,
 	const struct i2c_device_id *id)
 {
 	struct bq20z75_info *bq20z75_device;
+	struct bq20z75_platform_data *pdata = client->dev.platform_data;
 	int rc;
+	int irq;
 
 	bq20z75_device = kzalloc(sizeof(struct bq20z75_info), GFP_KERNEL);
 	if (!bq20z75_device)
 		return -ENOMEM;
 
 	bq20z75_device->client = client;
+	bq20z75_device->enable_detection = false;
+	bq20z75_device->gpio_detect = false;
 	bq20z75_device->power_supply.name = "battery";
 	bq20z75_device->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
 	bq20z75_device->power_supply.properties = bq20z75_properties;
@@ -413,26 +567,86 @@
 		ARRAY_SIZE(bq20z75_properties);
 	bq20z75_device->power_supply.get_property = bq20z75_get_property;
 
+	if (pdata) {
+		bq20z75_device->gpio_detect =
+			gpio_is_valid(pdata->battery_detect);
+		bq20z75_device->pdata = pdata;
+	}
+
 	i2c_set_clientdata(client, bq20z75_device);
 
+	if (!bq20z75_device->gpio_detect)
+		goto skip_gpio;
+
+	rc = gpio_request(pdata->battery_detect, dev_name(&client->dev));
+	if (rc) {
+		dev_warn(&client->dev, "Failed to request gpio: %d\n", rc);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	rc = gpio_direction_input(pdata->battery_detect);
+	if (rc) {
+		dev_warn(&client->dev, "Failed to get gpio as input: %d\n", rc);
+		gpio_free(pdata->battery_detect);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	irq = gpio_to_irq(pdata->battery_detect);
+	if (irq <= 0) {
+		dev_warn(&client->dev, "Failed to get gpio as irq: %d\n", irq);
+		gpio_free(pdata->battery_detect);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	rc = request_irq(irq, bq20z75_irq,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		dev_name(&client->dev), &bq20z75_device->power_supply);
+	if (rc) {
+		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
+		gpio_free(pdata->battery_detect);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	bq20z75_device->irq = irq;
+
+skip_gpio:
+
 	rc = power_supply_register(&client->dev, &bq20z75_device->power_supply);
 	if (rc) {
 		dev_err(&client->dev,
 			"%s: Failed to register power supply\n", __func__);
-		kfree(bq20z75_device);
-		return rc;
+		goto exit_psupply;
 	}
 
 	dev_info(&client->dev,
 		"%s: battery gas gauge device registered\n", client->name);
 
 	return 0;
+
+exit_psupply:
+	if (bq20z75_device->irq)
+		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
+	if (bq20z75_device->gpio_detect)
+		gpio_free(pdata->battery_detect);
+
+	kfree(bq20z75_device);
+
+	return rc;
 }
 
-static int bq20z75_remove(struct i2c_client *client)
+static int __devexit bq20z75_remove(struct i2c_client *client)
 {
 	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
 
+	if (bq20z75_device->irq)
+		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
+	if (bq20z75_device->gpio_detect)
+		gpio_free(bq20z75_device->pdata->battery_detect);
+
 	power_supply_unregister(&bq20z75_device->power_supply);
 	kfree(bq20z75_device);
 	bq20z75_device = NULL;
@@ -444,13 +658,14 @@
 static int bq20z75_suspend(struct i2c_client *client,
 	pm_message_t state)
 {
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
 	s32 ret;
 
 	/* write to manufacturer access with sleep command */
 	ret = bq20z75_write_word_data(client,
 		bq20z75_data[REG_MANUFACTURER_DATA].addr,
 		MANUFACTURER_ACCESS_SLEEP);
-	if (ret < 0)
+	if (bq20z75_device->is_present && ret < 0)
 		return ret;
 
 	return 0;
@@ -465,10 +680,11 @@
 	{ "bq20z75", 0 },
 	{}
 };
+MODULE_DEVICE_TABLE(i2c, bq20z75_id);
 
 static struct i2c_driver bq20z75_battery_driver = {
 	.probe		= bq20z75_probe,
-	.remove		= bq20z75_remove,
+	.remove		= __devexit_p(bq20z75_remove),
 	.suspend	= bq20z75_suspend,
 	.resume		= bq20z75_resume,
 	.id_table	= bq20z75_id,

diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c
index eff0273..59e68db 100644
--- a/drivers/power/bq27x00_battery.c
+++ b/drivers/power/bq27x00_battery.c

@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2008 Rodolfo Giometti <giometti@linux.it>
  * Copyright (C) 2008 Eurotech S.p.A. <info@eurotech.it>
+ * Copyright (C) 2010-2011 Lars-Peter Clausen <lars@metafoo.de>
  *
  * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc.
  *
@@ -15,6 +16,13 @@
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
  */
+
+/*
+ * Datasheets:
+ * http://focus.ti.com/docs/prod/folders/print/bq27000.html
+ * http://focus.ti.com/docs/prod/folders/print/bq27500.html
+ */
+
 #include <linux/module.h>
 #include <linux/param.h>
 #include <linux/jiffies.h>
@@ -27,7 +35,9 @@
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 
-#define DRIVER_VERSION			"1.1.0"
+#include <linux/power/bq27x00_battery.h>
+
+#define DRIVER_VERSION			"1.2.0"
 
 #define BQ27x00_REG_TEMP		0x06
 #define BQ27x00_REG_VOLT		0x08
@@ -36,36 +46,59 @@
 #define BQ27x00_REG_TTE			0x16
 #define BQ27x00_REG_TTF			0x18
 #define BQ27x00_REG_TTECP		0x26
+#define BQ27x00_REG_NAC			0x0C /* Nominal available capaciy */
+#define BQ27x00_REG_LMD			0x12 /* Last measured discharge */
+#define BQ27x00_REG_CYCT		0x2A /* Cycle count total */
+#define BQ27x00_REG_AE			0x22 /* Available enery */
 
 #define BQ27000_REG_RSOC		0x0B /* Relative State-of-Charge */
+#define BQ27000_REG_ILMD		0x76 /* Initial last measured discharge */
 #define BQ27000_FLAG_CHGS		BIT(7)
+#define BQ27000_FLAG_FC			BIT(5)
 
-#define BQ27500_REG_SOC			0x2c
+#define BQ27500_REG_SOC			0x2C
+#define BQ27500_REG_DCAP		0x3C /* Design capacity */
 #define BQ27500_FLAG_DSC		BIT(0)
 #define BQ27500_FLAG_FC			BIT(9)
 
-/* If the system has several batteries we need a different name for each
- * of them...
- */
-static DEFINE_IDR(battery_id);
-static DEFINE_MUTEX(battery_mutex);
+#define BQ27000_RS			20 /* Resistor sense */
 
 struct bq27x00_device_info;
 struct bq27x00_access_methods {
-	int (*read)(u8 reg, int *rt_value, int b_single,
-		struct bq27x00_device_info *di);
+	int (*read)(struct bq27x00_device_info *di, u8 reg, bool single);
 };
 
 enum bq27x00_chip { BQ27000, BQ27500 };
 
+struct bq27x00_reg_cache {
+	int temperature;
+	int time_to_empty;
+	int time_to_empty_avg;
+	int time_to_full;
+	int charge_full;
+	int charge_counter;
+	int capacity;
+	int flags;
+
+	int current_now;
+};
+
 struct bq27x00_device_info {
 	struct device 		*dev;
 	int			id;
-	struct bq27x00_access_methods	*bus;
-	struct power_supply	bat;
 	enum bq27x00_chip	chip;
 
-	struct i2c_client	*client;
+	struct bq27x00_reg_cache cache;
+	int charge_design_full;
+
+	unsigned long last_update;
+	struct delayed_work work;
+
+	struct power_supply	bat;
+
+	struct bq27x00_access_methods bus;
+
+	struct mutex lock;
 };
 
 static enum power_supply_property bq27x00_battery_props[] = {
@@ -78,164 +111,328 @@
 	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
 	POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
 	POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CHARGE_COUNTER,
+	POWER_SUPPLY_PROP_ENERGY_NOW,
 };
 
+static unsigned int poll_interval = 360;
+module_param(poll_interval, uint, 0644);
+MODULE_PARM_DESC(poll_interval, "battery poll interval in seconds - " \
+				"0 disables polling");
+
 /*
  * Common code for BQ27x00 devices
  */
 
-static int bq27x00_read(u8 reg, int *rt_value, int b_single,
-			struct bq27x00_device_info *di)
+static inline int bq27x00_read(struct bq27x00_device_info *di, u8 reg,
+		bool single)
 {
-	return di->bus->read(reg, rt_value, b_single, di);
-}
-
-/*
- * Return the battery temperature in tenths of degree Celsius
- * Or < 0 if something fails.
- */
-static int bq27x00_battery_temperature(struct bq27x00_device_info *di)
-{
-	int ret;
-	int temp = 0;
-
-	ret = bq27x00_read(BQ27x00_REG_TEMP, &temp, 0, di);
-	if (ret) {
-		dev_err(di->dev, "error reading temperature\n");
-		return ret;
-	}
-
-	if (di->chip == BQ27500)
-		return temp - 2731;
-	else
-		return ((temp >> 2) - 273) * 10;
-}
-
-/*
- * Return the battery Voltage in milivolts
- * Or < 0 if something fails.
- */
-static int bq27x00_battery_voltage(struct bq27x00_device_info *di)
-{
-	int ret;
-	int volt = 0;
-
-	ret = bq27x00_read(BQ27x00_REG_VOLT, &volt, 0, di);
-	if (ret) {
-		dev_err(di->dev, "error reading voltage\n");
-		return ret;
-	}
-
-	return volt * 1000;
-}
-
-/*
- * Return the battery average current
- * Note that current can be negative signed as well
- * Or 0 if something fails.
- */
-static int bq27x00_battery_current(struct bq27x00_device_info *di)
-{
-	int ret;
-	int curr = 0;
-	int flags = 0;
-
-	ret = bq27x00_read(BQ27x00_REG_AI, &curr, 0, di);
-	if (ret) {
-		dev_err(di->dev, "error reading current\n");
-		return 0;
-	}
-
-	if (di->chip == BQ27500) {
-		/* bq27500 returns signed value */
-		curr = (int)(s16)curr;
-	} else {
-		ret = bq27x00_read(BQ27x00_REG_FLAGS, &flags, 0, di);
-		if (ret < 0) {
-			dev_err(di->dev, "error reading flags\n");
-			return 0;
-		}
-		if (flags & BQ27000_FLAG_CHGS) {
-			dev_dbg(di->dev, "negative current!\n");
-			curr = -curr;
-		}
-	}
-
-	return curr * 1000;
+	return di->bus.read(di, reg, single);
 }
 
 /*
  * Return the battery Relative State-of-Charge
  * Or < 0 if something fails.
  */
-static int bq27x00_battery_rsoc(struct bq27x00_device_info *di)
+static int bq27x00_battery_read_rsoc(struct bq27x00_device_info *di)
 {
-	int ret;
-	int rsoc = 0;
+	int rsoc;
 
 	if (di->chip == BQ27500)
-		ret = bq27x00_read(BQ27500_REG_SOC, &rsoc, 0, di);
+		rsoc = bq27x00_read(di, BQ27500_REG_SOC, false);
 	else
-		ret = bq27x00_read(BQ27000_REG_RSOC, &rsoc, 1, di);
-	if (ret) {
+		rsoc = bq27x00_read(di, BQ27000_REG_RSOC, true);
+
+	if (rsoc < 0)
 		dev_err(di->dev, "error reading relative State-of-Charge\n");
-		return ret;
-	}
 
 	return rsoc;
 }
 
-static int bq27x00_battery_status(struct bq27x00_device_info *di,
-				  union power_supply_propval *val)
+/*
+ * Return a battery charge value in µAh
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_read_charge(struct bq27x00_device_info *di, u8 reg)
 {
-	int flags = 0;
-	int status;
-	int ret;
+	int charge;
 
-	ret = bq27x00_read(BQ27x00_REG_FLAGS, &flags, 0, di);
-	if (ret < 0) {
-		dev_err(di->dev, "error reading flags\n");
-		return ret;
+	charge = bq27x00_read(di, reg, false);
+	if (charge < 0) {
+		dev_err(di->dev, "error reading nominal available capacity\n");
+		return charge;
 	}
 
-	if (di->chip == BQ27500) {
-		if (flags & BQ27500_FLAG_FC)
-			status = POWER_SUPPLY_STATUS_FULL;
-		else if (flags & BQ27500_FLAG_DSC)
-			status = POWER_SUPPLY_STATUS_DISCHARGING;
-		else
-			status = POWER_SUPPLY_STATUS_CHARGING;
-	} else {
-		if (flags & BQ27000_FLAG_CHGS)
-			status = POWER_SUPPLY_STATUS_CHARGING;
-		else
-			status = POWER_SUPPLY_STATUS_DISCHARGING;
+	if (di->chip == BQ27500)
+		charge *= 1000;
+	else
+		charge = charge * 3570 / BQ27000_RS;
+
+	return charge;
+}
+
+/*
+ * Return the battery Nominal available capaciy in µAh
+ * Or < 0 if something fails.
+ */
+static inline int bq27x00_battery_read_nac(struct bq27x00_device_info *di)
+{
+	return bq27x00_battery_read_charge(di, BQ27x00_REG_NAC);
+}
+
+/*
+ * Return the battery Last measured discharge in µAh
+ * Or < 0 if something fails.
+ */
+static inline int bq27x00_battery_read_lmd(struct bq27x00_device_info *di)
+{
+	return bq27x00_battery_read_charge(di, BQ27x00_REG_LMD);
+}
+
+/*
+ * Return the battery Initial last measured discharge in µAh
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_read_ilmd(struct bq27x00_device_info *di)
+{
+	int ilmd;
+
+	if (di->chip == BQ27500)
+		ilmd = bq27x00_read(di, BQ27500_REG_DCAP, false);
+	else
+		ilmd = bq27x00_read(di, BQ27000_REG_ILMD, true);
+
+	if (ilmd < 0) {
+		dev_err(di->dev, "error reading initial last measured discharge\n");
+		return ilmd;
 	}
 
-	val->intval = status;
-	return 0;
+	if (di->chip == BQ27500)
+		ilmd *= 1000;
+	else
+		ilmd = ilmd * 256 * 3570 / BQ27000_RS;
+
+	return ilmd;
+}
+
+/*
+ * Return the battery Cycle count total
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_read_cyct(struct bq27x00_device_info *di)
+{
+	int cyct;
+
+	cyct = bq27x00_read(di, BQ27x00_REG_CYCT, false);
+	if (cyct < 0)
+		dev_err(di->dev, "error reading cycle count total\n");
+
+	return cyct;
 }
 
 /*
  * Read a time register.
  * Return < 0 if something fails.
  */
-static int bq27x00_battery_time(struct bq27x00_device_info *di, int reg,
-				union power_supply_propval *val)
+static int bq27x00_battery_read_time(struct bq27x00_device_info *di, u8 reg)
 {
-	int tval = 0;
-	int ret;
+	int tval;
 
-	ret = bq27x00_read(reg, &tval, 0, di);
-	if (ret) {
-		dev_err(di->dev, "error reading register %02x\n", reg);
-		return ret;
+	tval = bq27x00_read(di, reg, false);
+	if (tval < 0) {
+		dev_err(di->dev, "error reading register %02x: %d\n", reg, tval);
+		return tval;
 	}
 
 	if (tval == 65535)
 		return -ENODATA;
 
-	val->intval = tval * 60;
+	return tval * 60;
+}
+
+static void bq27x00_update(struct bq27x00_device_info *di)
+{
+	struct bq27x00_reg_cache cache = {0, };
+	bool is_bq27500 = di->chip == BQ27500;
+
+	cache.flags = bq27x00_read(di, BQ27x00_REG_FLAGS, is_bq27500);
+	if (cache.flags >= 0) {
+		cache.capacity = bq27x00_battery_read_rsoc(di);
+		cache.temperature = bq27x00_read(di, BQ27x00_REG_TEMP, false);
+		cache.time_to_empty = bq27x00_battery_read_time(di, BQ27x00_REG_TTE);
+		cache.time_to_empty_avg = bq27x00_battery_read_time(di, BQ27x00_REG_TTECP);
+		cache.time_to_full = bq27x00_battery_read_time(di, BQ27x00_REG_TTF);
+		cache.charge_full = bq27x00_battery_read_lmd(di);
+		cache.charge_counter = bq27x00_battery_read_cyct(di);
+
+		if (!is_bq27500)
+			cache.current_now = bq27x00_read(di, BQ27x00_REG_AI, false);
+
+		/* We only have to read charge design full once */
+		if (di->charge_design_full <= 0)
+			di->charge_design_full = bq27x00_battery_read_ilmd(di);
+	}
+
+	/* Ignore current_now which is a snapshot of the current battery state
+	 * and is likely to be different even between two consecutive reads */
+	if (memcmp(&di->cache, &cache, sizeof(cache) - sizeof(int)) != 0) {
+		di->cache = cache;
+		power_supply_changed(&di->bat);
+	}
+
+	di->last_update = jiffies;
+}
+
+static void bq27x00_battery_poll(struct work_struct *work)
+{
+	struct bq27x00_device_info *di =
+		container_of(work, struct bq27x00_device_info, work.work);
+
+	bq27x00_update(di);
+
+	if (poll_interval > 0) {
+		/* The timer does not have to be accurate. */
+		set_timer_slack(&di->work.timer, poll_interval * HZ / 4);
+		schedule_delayed_work(&di->work, poll_interval * HZ);
+	}
+}
+
+
+/*
+ * Return the battery temperature in tenths of degree Celsius
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_temperature(struct bq27x00_device_info *di,
+	union power_supply_propval *val)
+{
+	if (di->cache.temperature < 0)
+		return di->cache.temperature;
+
+	if (di->chip == BQ27500)
+		val->intval = di->cache.temperature - 2731;
+	else
+		val->intval = ((di->cache.temperature * 5) - 5463) / 2;
+
+	return 0;
+}
+
+/*
+ * Return the battery average current in µA
+ * Note that current can be negative signed as well
+ * Or 0 if something fails.
+ */
+static int bq27x00_battery_current(struct bq27x00_device_info *di,
+	union power_supply_propval *val)
+{
+	int curr;
+
+	if (di->chip == BQ27500)
+	    curr = bq27x00_read(di, BQ27x00_REG_AI, false);
+	else
+	    curr = di->cache.current_now;
+
+	if (curr < 0)
+		return curr;
+
+	if (di->chip == BQ27500) {
+		/* bq27500 returns signed value */
+		val->intval = (int)((s16)curr) * 1000;
+	} else {
+		if (di->cache.flags & BQ27000_FLAG_CHGS) {
+			dev_dbg(di->dev, "negative current!\n");
+			curr = -curr;
+		}
+
+		val->intval = curr * 3570 / BQ27000_RS;
+	}
+
+	return 0;
+}
+
+static int bq27x00_battery_status(struct bq27x00_device_info *di,
+	union power_supply_propval *val)
+{
+	int status;
+
+	if (di->chip == BQ27500) {
+		if (di->cache.flags & BQ27500_FLAG_FC)
+			status = POWER_SUPPLY_STATUS_FULL;
+		else if (di->cache.flags & BQ27500_FLAG_DSC)
+			status = POWER_SUPPLY_STATUS_DISCHARGING;
+		else
+			status = POWER_SUPPLY_STATUS_CHARGING;
+	} else {
+		if (di->cache.flags & BQ27000_FLAG_FC)
+			status = POWER_SUPPLY_STATUS_FULL;
+		else if (di->cache.flags & BQ27000_FLAG_CHGS)
+			status = POWER_SUPPLY_STATUS_CHARGING;
+		else if (power_supply_am_i_supplied(&di->bat))
+			status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		else
+			status = POWER_SUPPLY_STATUS_DISCHARGING;
+	}
+
+	val->intval = status;
+
+	return 0;
+}
+
+/*
+ * Return the battery Voltage in milivolts
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_voltage(struct bq27x00_device_info *di,
+	union power_supply_propval *val)
+{
+	int volt;
+
+	volt = bq27x00_read(di, BQ27x00_REG_VOLT, false);
+	if (volt < 0)
+		return volt;
+
+	val->intval = volt * 1000;
+
+	return 0;
+}
+
+/*
+ * Return the battery Available energy in µWh
+ * Or < 0 if something fails.
+ */
+static int bq27x00_battery_energy(struct bq27x00_device_info *di,
+	union power_supply_propval *val)
+{
+	int ae;
+
+	ae = bq27x00_read(di, BQ27x00_REG_AE, false);
+	if (ae < 0) {
+		dev_err(di->dev, "error reading available energy\n");
+		return ae;
+	}
+
+	if (di->chip == BQ27500)
+		ae *= 1000;
+	else
+		ae = ae * 29200 / BQ27000_RS;
+
+	val->intval = ae;
+
+	return 0;
+}
+
+
+static int bq27x00_simple_value(int value,
+	union power_supply_propval *val)
+{
+	if (value < 0)
+		return value;
+
+	val->intval = value;
+
 	return 0;
 }
 
@@ -249,33 +446,61 @@
 	int ret = 0;
 	struct bq27x00_device_info *di = to_bq27x00_device_info(psy);
 
+	mutex_lock(&di->lock);
+	if (time_is_before_jiffies(di->last_update + 5 * HZ)) {
+		cancel_delayed_work_sync(&di->work);
+		bq27x00_battery_poll(&di->work.work);
+	}
+	mutex_unlock(&di->lock);
+
+	if (psp != POWER_SUPPLY_PROP_PRESENT && di->cache.flags < 0)
+		return -ENODEV;
+
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
 		ret = bq27x00_battery_status(di, val);
 		break;
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = bq27x00_battery_voltage(di, val);
+		break;
 	case POWER_SUPPLY_PROP_PRESENT:
-		val->intval = bq27x00_battery_voltage(di);
-		if (psp == POWER_SUPPLY_PROP_PRESENT)
-			val->intval = val->intval <= 0 ? 0 : 1;
+		val->intval = di->cache.flags < 0 ? 0 : 1;
 		break;
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
-		val->intval = bq27x00_battery_current(di);
+		ret = bq27x00_battery_current(di, val);
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY:
-		val->intval = bq27x00_battery_rsoc(di);
+		ret = bq27x00_simple_value(di->cache.capacity, val);
 		break;
 	case POWER_SUPPLY_PROP_TEMP:
-		val->intval = bq27x00_battery_temperature(di);
+		ret = bq27x00_battery_temperature(di, val);
 		break;
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
-		ret = bq27x00_battery_time(di, BQ27x00_REG_TTE, val);
+		ret = bq27x00_simple_value(di->cache.time_to_empty, val);
 		break;
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
-		ret = bq27x00_battery_time(di, BQ27x00_REG_TTECP, val);
+		ret = bq27x00_simple_value(di->cache.time_to_empty_avg, val);
 		break;
 	case POWER_SUPPLY_PROP_TIME_TO_FULL_NOW:
-		ret = bq27x00_battery_time(di, BQ27x00_REG_TTF, val);
+		ret = bq27x00_simple_value(di->cache.time_to_full, val);
+		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+		ret = bq27x00_simple_value(bq27x00_battery_read_nac(di), val);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+		ret = bq27x00_simple_value(di->cache.charge_full, val);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+		ret = bq27x00_simple_value(di->charge_design_full, val);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_COUNTER:
+		ret = bq27x00_simple_value(di->cache.charge_counter, val);
+		break;
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+		ret = bq27x00_battery_energy(di, val);
 		break;
 	default:
 		return -EINVAL;
@@ -284,56 +509,91 @@
 	return ret;
 }
 
-static void bq27x00_powersupply_init(struct bq27x00_device_info *di)
+static void bq27x00_external_power_changed(struct power_supply *psy)
 {
+	struct bq27x00_device_info *di = to_bq27x00_device_info(psy);
+
+	cancel_delayed_work_sync(&di->work);
+	schedule_delayed_work(&di->work, 0);
+}
+
+static int bq27x00_powersupply_init(struct bq27x00_device_info *di)
+{
+	int ret;
+
 	di->bat.type = POWER_SUPPLY_TYPE_BATTERY;
 	di->bat.properties = bq27x00_battery_props;
 	di->bat.num_properties = ARRAY_SIZE(bq27x00_battery_props);
 	di->bat.get_property = bq27x00_battery_get_property;
-	di->bat.external_power_changed = NULL;
+	di->bat.external_power_changed = bq27x00_external_power_changed;
+
+	INIT_DELAYED_WORK(&di->work, bq27x00_battery_poll);
+	mutex_init(&di->lock);
+
+	ret = power_supply_register(di->dev, &di->bat);
+	if (ret) {
+		dev_err(di->dev, "failed to register battery: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(di->dev, "support ver. %s enabled\n", DRIVER_VERSION);
+
+	bq27x00_update(di);
+
+	return 0;
 }
 
-/*
- * i2c specific code
- */
-
-static int bq27x00_read_i2c(u8 reg, int *rt_value, int b_single,
-			struct bq27x00_device_info *di)
+static void bq27x00_powersupply_unregister(struct bq27x00_device_info *di)
 {
-	struct i2c_client *client = di->client;
-	struct i2c_msg msg[1];
+	cancel_delayed_work_sync(&di->work);
+
+	power_supply_unregister(&di->bat);
+
+	mutex_destroy(&di->lock);
+}
+
+
+/* i2c specific code */
+#ifdef CONFIG_BATTERY_BQ27X00_I2C
+
+/* If the system has several batteries we need a different name for each
+ * of them...
+ */
+static DEFINE_IDR(battery_id);
+static DEFINE_MUTEX(battery_mutex);
+
+static int bq27x00_read_i2c(struct bq27x00_device_info *di, u8 reg, bool single)
+{
+	struct i2c_client *client = to_i2c_client(di->dev);
+	struct i2c_msg msg[2];
 	unsigned char data[2];
-	int err;
+	int ret;
 
 	if (!client->adapter)
 		return -ENODEV;
 
-	msg->addr = client->addr;
-	msg->flags = 0;
-	msg->len = 1;
-	msg->buf = data;
+	msg[0].addr = client->addr;
+	msg[0].flags = 0;
+	msg[0].buf = &reg;
+	msg[0].len = sizeof(reg);
+	msg[1].addr = client->addr;
+	msg[1].flags = I2C_M_RD;
+	msg[1].buf = data;
+	if (single)
+		msg[1].len = 1;
+	else
+		msg[1].len = 2;
 
-	data[0] = reg;
-	err = i2c_transfer(client->adapter, msg, 1);
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+	if (ret < 0)
+		return ret;
 
-	if (err >= 0) {
-		if (!b_single)
-			msg->len = 2;
-		else
-			msg->len = 1;
+	if (!single)
+		ret = get_unaligned_le16(data);
+	else
+		ret = data[0];
 
-		msg->flags = I2C_M_RD;
-		err = i2c_transfer(client->adapter, msg, 1);
-		if (err >= 0) {
-			if (!b_single)
-				*rt_value = get_unaligned_le16(data);
-			else
-				*rt_value = data[0];
-
-			return 0;
-		}
-	}
-	return err;
+	return ret;
 }
 
 static int bq27x00_battery_probe(struct i2c_client *client,
@@ -341,7 +601,6 @@
 {
 	char *name;
 	struct bq27x00_device_info *di;
-	struct bq27x00_access_methods *bus;
 	int num;
 	int retval = 0;
 
@@ -368,38 +627,20 @@
 		retval = -ENOMEM;
 		goto batt_failed_2;
 	}
-	di->id = num;
-	di->chip = id->driver_data;
 
-	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
-	if (!bus) {
-		dev_err(&client->dev, "failed to allocate access method "
-					"data\n");
-		retval = -ENOMEM;
+	di->id = num;
+	di->dev = &client->dev;
+	di->chip = id->driver_data;
+	di->bat.name = name;
+	di->bus.read = &bq27x00_read_i2c;
+
+	if (bq27x00_powersupply_init(di))
 		goto batt_failed_3;
-	}
 
 	i2c_set_clientdata(client, di);
-	di->dev = &client->dev;
-	di->bat.name = name;
-	bus->read = &bq27x00_read_i2c;
-	di->bus = bus;
-	di->client = client;
-
-	bq27x00_powersupply_init(di);
-
-	retval = power_supply_register(&client->dev, &di->bat);
-	if (retval) {
-		dev_err(&client->dev, "failed to register battery\n");
-		goto batt_failed_4;
-	}
-
-	dev_info(&client->dev, "support ver. %s enabled\n", DRIVER_VERSION);
 
 	return 0;
 
-batt_failed_4:
-	kfree(bus);
 batt_failed_3:
 	kfree(di);
 batt_failed_2:
@@ -416,9 +657,8 @@
 {
 	struct bq27x00_device_info *di = i2c_get_clientdata(client);
 
-	power_supply_unregister(&di->bat);
+	bq27x00_powersupply_unregister(di);
 
-	kfree(di->bus);
 	kfree(di->bat.name);
 
 	mutex_lock(&battery_mutex);
@@ -430,15 +670,12 @@
 	return 0;
 }
 
-/*
- * Module stuff
- */
-
 static const struct i2c_device_id bq27x00_id[] = {
 	{ "bq27200", BQ27000 },	/* bq27200 is same as bq27000, but with i2c */
 	{ "bq27500", BQ27500 },
 	{},
 };
+MODULE_DEVICE_TABLE(i2c, bq27x00_id);
 
 static struct i2c_driver bq27x00_battery_driver = {
 	.driver = {
@@ -449,13 +686,164 @@
 	.id_table = bq27x00_id,
 };
 
+static inline int bq27x00_battery_i2c_init(void)
+{
+	int ret = i2c_add_driver(&bq27x00_battery_driver);
+	if (ret)
+		printk(KERN_ERR "Unable to register BQ27x00 i2c driver\n");
+
+	return ret;
+}
+
+static inline void bq27x00_battery_i2c_exit(void)
+{
+	i2c_del_driver(&bq27x00_battery_driver);
+}
+
+#else
+
+static inline int bq27x00_battery_i2c_init(void) { return 0; }
+static inline void bq27x00_battery_i2c_exit(void) {};
+
+#endif
+
+/* platform specific code */
+#ifdef CONFIG_BATTERY_BQ27X00_PLATFORM
+
+static int bq27000_read_platform(struct bq27x00_device_info *di, u8 reg,
+			bool single)
+{
+	struct device *dev = di->dev;
+	struct bq27000_platform_data *pdata = dev->platform_data;
+	unsigned int timeout = 3;
+	int upper, lower;
+	int temp;
+
+	if (!single) {
+		/* Make sure the value has not changed in between reading the
+		 * lower and the upper part */
+		upper = pdata->read(dev, reg + 1);
+		do {
+			temp = upper;
+			if (upper < 0)
+				return upper;
+
+			lower = pdata->read(dev, reg);
+			if (lower < 0)
+				return lower;
+
+			upper = pdata->read(dev, reg + 1);
+		} while (temp != upper && --timeout);
+
+		if (timeout == 0)
+			return -EIO;
+
+		return (upper << 8) | lower;
+	}
+
+	return pdata->read(dev, reg);
+}
+
+static int __devinit bq27000_battery_probe(struct platform_device *pdev)
+{
+	struct bq27x00_device_info *di;
+	struct bq27000_platform_data *pdata = pdev->dev.platform_data;
+	int ret;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform_data supplied\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->read) {
+		dev_err(&pdev->dev, "no hdq read callback supplied\n");
+		return -EINVAL;
+	}
+
+	di = kzalloc(sizeof(*di), GFP_KERNEL);
+	if (!di) {
+		dev_err(&pdev->dev, "failed to allocate device info data\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, di);
+
+	di->dev = &pdev->dev;
+	di->chip = BQ27000;
+
+	di->bat.name = pdata->name ?: dev_name(&pdev->dev);
+	di->bus.read = &bq27000_read_platform;
+
+	ret = bq27x00_powersupply_init(di);
+	if (ret)
+		goto err_free;
+
+	return 0;
+
+err_free:
+	platform_set_drvdata(pdev, NULL);
+	kfree(di);
+
+	return ret;
+}
+
+static int __devexit bq27000_battery_remove(struct platform_device *pdev)
+{
+	struct bq27x00_device_info *di = platform_get_drvdata(pdev);
+
+	bq27x00_powersupply_unregister(di);
+
+	platform_set_drvdata(pdev, NULL);
+	kfree(di);
+
+	return 0;
+}
+
+static struct platform_driver bq27000_battery_driver = {
+	.probe	= bq27000_battery_probe,
+	.remove = __devexit_p(bq27000_battery_remove),
+	.driver = {
+		.name = "bq27000-battery",
+		.owner = THIS_MODULE,
+	},
+};
+
+static inline int bq27x00_battery_platform_init(void)
+{
+	int ret = platform_driver_register(&bq27000_battery_driver);
+	if (ret)
+		printk(KERN_ERR "Unable to register BQ27000 platform driver\n");
+
+	return ret;
+}
+
+static inline void bq27x00_battery_platform_exit(void)
+{
+	platform_driver_unregister(&bq27000_battery_driver);
+}
+
+#else
+
+static inline int bq27x00_battery_platform_init(void) { return 0; }
+static inline void bq27x00_battery_platform_exit(void) {};
+
+#endif
+
+/*
+ * Module stuff
+ */
+
 static int __init bq27x00_battery_init(void)
 {
 	int ret;
 
-	ret = i2c_add_driver(&bq27x00_battery_driver);
+	ret = bq27x00_battery_i2c_init();
 	if (ret)
-		printk(KERN_ERR "Unable to register BQ27x00 driver\n");
+		return ret;
+
+	ret = bq27x00_battery_platform_init();
+	if (ret)
+		bq27x00_battery_i2c_exit();
 
 	return ret;
 }
@@ -463,7 +851,8 @@
 
 static void __exit bq27x00_battery_exit(void)
 {
-	i2c_del_driver(&bq27x00_battery_driver);
+	bq27x00_battery_platform_exit();
+	bq27x00_battery_i2c_exit();
 }
 module_exit(bq27x00_battery_exit);
 

diff --git a/drivers/power/ds2782_battery.c b/drivers/power/ds2782_battery.c
index 6957e8a..4d2dc4f 100644
--- a/drivers/power/ds2782_battery.c
+++ b/drivers/power/ds2782_battery.c

@@ -393,6 +393,7 @@
 	{"ds2786", DS2786},
 	{},
 };
+MODULE_DEVICE_TABLE(i2c, ds278x_id);
 
 static struct i2c_driver ds278x_battery_driver = {
 	.driver 	= {

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 970f733..329b46b 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c

@@ -171,6 +171,8 @@
 	dev_set_drvdata(dev, psy);
 	psy->dev = dev;
 
+	INIT_WORK(&psy->changed_work, power_supply_changed_work);
+
 	rc = kobject_set_name(&dev->kobj, "%s", psy->name);
 	if (rc)
 		goto kobject_set_name_failed;
@@ -179,8 +181,6 @@
 	if (rc)
 		goto device_add_failed;
 
-	INIT_WORK(&psy->changed_work, power_supply_changed_work);
-
 	rc = power_supply_create_triggers(psy);
 	if (rc)
 		goto create_triggers_failed;

diff --git a/drivers/power/power_supply_leds.c b/drivers/power/power_supply_leds.c
index 031a554..da25eb9 100644
--- a/drivers/power/power_supply_leds.c
+++ b/drivers/power/power_supply_leds.c

@@ -21,6 +21,8 @@
 static void power_supply_update_bat_leds(struct power_supply *psy)
 {
 	union power_supply_propval status;
+	unsigned long delay_on = 0;
+	unsigned long delay_off = 0;
 
 	if (psy->get_property(psy, POWER_SUPPLY_PROP_STATUS, &status))
 		return;
@@ -32,16 +34,22 @@
 		led_trigger_event(psy->charging_full_trig, LED_FULL);
 		led_trigger_event(psy->charging_trig, LED_OFF);
 		led_trigger_event(psy->full_trig, LED_FULL);
+		led_trigger_event(psy->charging_blink_full_solid_trig,
+			LED_FULL);
 		break;
 	case POWER_SUPPLY_STATUS_CHARGING:
 		led_trigger_event(psy->charging_full_trig, LED_FULL);
 		led_trigger_event(psy->charging_trig, LED_FULL);
 		led_trigger_event(psy->full_trig, LED_OFF);
+		led_trigger_blink(psy->charging_blink_full_solid_trig,
+			&delay_on, &delay_off);
 		break;
 	default:
 		led_trigger_event(psy->charging_full_trig, LED_OFF);
 		led_trigger_event(psy->charging_trig, LED_OFF);
 		led_trigger_event(psy->full_trig, LED_OFF);
+		led_trigger_event(psy->charging_blink_full_solid_trig,
+			LED_OFF);
 		break;
 	}
 }
@@ -64,15 +72,24 @@
 	if (!psy->full_trig_name)
 		goto full_failed;
 
+	psy->charging_blink_full_solid_trig_name = kasprintf(GFP_KERNEL,
+		"%s-charging-blink-full-solid", psy->name);
+	if (!psy->charging_blink_full_solid_trig_name)
+		goto charging_blink_full_solid_failed;
+
 	led_trigger_register_simple(psy->charging_full_trig_name,
 				    &psy->charging_full_trig);
 	led_trigger_register_simple(psy->charging_trig_name,
 				    &psy->charging_trig);
 	led_trigger_register_simple(psy->full_trig_name,
 				    &psy->full_trig);
+	led_trigger_register_simple(psy->charging_blink_full_solid_trig_name,
+				    &psy->charging_blink_full_solid_trig);
 
 	goto success;
 
+charging_blink_full_solid_failed:
+	kfree(psy->full_trig_name);
 full_failed:
 	kfree(psy->charging_trig_name);
 charging_failed:
@@ -88,6 +105,8 @@
 	led_trigger_unregister_simple(psy->charging_full_trig);
 	led_trigger_unregister_simple(psy->charging_trig);
 	led_trigger_unregister_simple(psy->full_trig);
+	led_trigger_unregister_simple(psy->charging_blink_full_solid_trig);
+	kfree(psy->charging_blink_full_solid_trig_name);
 	kfree(psy->full_trig_name);
 	kfree(psy->charging_trig_name);
 	kfree(psy->charging_full_trig_name);

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index cd1f907..605514a 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c

@@ -270,7 +270,7 @@
 		attr = &power_supply_attrs[psy->properties[j]];
 
 		ret = power_supply_show_property(dev, attr, prop_buf);
-		if (ret == -ENODEV) {
+		if (ret == -ENODEV || ret == -ENODATA) {
 			/* When a battery is absent, we expect -ENODEV. Don't abort;
 			   send the uevent with at least the the PRESENT=0 property */
 			ret = 0;

diff --git a/drivers/power/s3c_adc_battery.c b/drivers/power/s3c_adc_battery.c
index 4255f23..d36c289 100644
--- a/drivers/power/s3c_adc_battery.c
+++ b/drivers/power/s3c_adc_battery.c

@@ -406,8 +406,8 @@
 	return 0;
 }
 #else
-#define s3c_adc_battery_suspend NULL
-#define s3c_adc_battery_resume NULL
+#define s3c_adc_bat_suspend NULL
+#define s3c_adc_bat_resume NULL
 #endif
 
 static struct platform_driver s3c_adc_bat_driver = {

diff --git a/drivers/power/twl4030_charger.c b/drivers/power/twl4030_charger.c
index ff1f423..92c16e1 100644
--- a/drivers/power/twl4030_charger.c
+++ b/drivers/power/twl4030_charger.c

@@ -71,8 +71,11 @@
 	struct power_supply	usb;
 	struct otg_transceiver	*transceiver;
 	struct notifier_block	otg_nb;
+	struct work_struct	work;
 	int			irq_chg;
 	int			irq_bci;
+
+	unsigned long		event;
 };
 
 /*
@@ -258,14 +261,11 @@
 	return IRQ_HANDLED;
 }
 
-static int twl4030_bci_usb_ncb(struct notifier_block *nb, unsigned long val,
-			       void *priv)
+static void twl4030_bci_usb_work(struct work_struct *data)
 {
-	struct twl4030_bci *bci = container_of(nb, struct twl4030_bci, otg_nb);
+	struct twl4030_bci *bci = container_of(data, struct twl4030_bci, work);
 
-	dev_dbg(bci->dev, "OTG notify %lu\n", val);
-
-	switch (val) {
+	switch (bci->event) {
 	case USB_EVENT_VBUS:
 	case USB_EVENT_CHARGER:
 		twl4030_charger_enable_usb(bci, true);
@@ -274,6 +274,17 @@
 		twl4030_charger_enable_usb(bci, false);
 		break;
 	}
+}
+
+static int twl4030_bci_usb_ncb(struct notifier_block *nb, unsigned long val,
+			       void *priv)
+{
+	struct twl4030_bci *bci = container_of(nb, struct twl4030_bci, otg_nb);
+
+	dev_dbg(bci->dev, "OTG notify %lu\n", val);
+
+	bci->event = val;
+	schedule_work(&bci->work);
 
 	return NOTIFY_OK;
 }
@@ -466,6 +477,8 @@
 		goto fail_bci_irq;
 	}
 
+	INIT_WORK(&bci->work, twl4030_bci_usb_work);
+
 	bci->transceiver = otg_get_transceiver();
 	if (bci->transceiver != NULL) {
 		bci->otg_nb.notifier_call = twl4030_bci_usb_ncb;

diff --git a/drivers/power/z2_battery.c b/drivers/power/z2_battery.c
index e5ed52d..2a9ab89 100644
--- a/drivers/power/z2_battery.c
+++ b/drivers/power/z2_battery.c

@@ -134,6 +134,8 @@
 	enum power_supply_property *prop;
 	struct z2_battery_info *info = charger->info;
 
+	if (info->charge_gpio >= 0)
+		props++;	/* POWER_SUPPLY_PROP_STATUS */
 	if (info->batt_tech >= 0)
 		props++;	/* POWER_SUPPLY_PROP_TECHNOLOGY */
 	if (info->batt_I2C_reg >= 0)
@@ -293,6 +295,7 @@
 	{ "aer915", 0 },
 	{ }
 };
+MODULE_DEVICE_TABLE(i2c, z2_batt_id);
 
 static struct i2c_driver z2_batt_driver = {
 	.driver	= {

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index de75f67..b9f29e0 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig

@@ -126,7 +126,7 @@
 	  and S5PC1XX chips to control VCC_CORE and VCC_USIM voltages.
 
 config REGULATOR_TWL4030
-	bool "TI TWL4030/TWL5030/TWL6030/TPS695x0 PMIC"
+	bool "TI TWL4030/TWL5030/TWL6030/TPS659x0 PMIC"
 	depends on TWL4030_CORE
 	help
 	  This driver supports the voltage regulators provided by

diff --git a/drivers/regulator/ab3100.c b/drivers/regulator/ab3100.c
index 2dec589..b1d7794 100644
--- a/drivers/regulator/ab3100.c
+++ b/drivers/regulator/ab3100.c

@@ -206,29 +206,6 @@
 		return err;
 	}
 
-	/* Per-regulator power on delay from spec */
-	switch (abreg->regreg) {
-	case AB3100_LDO_A: /* Fallthrough */
-	case AB3100_LDO_C: /* Fallthrough */
-	case AB3100_LDO_D: /* Fallthrough */
-	case AB3100_LDO_E: /* Fallthrough */
-	case AB3100_LDO_H: /* Fallthrough */
-	case AB3100_LDO_K:
-		udelay(200);
-		break;
-	case AB3100_LDO_F:
-		udelay(600);
-		break;
-	case AB3100_LDO_G:
-		udelay(400);
-		break;
-	case AB3100_BUCK:
-		mdelay(1);
-		break;
-	default:
-		break;
-	}
-
 	return 0;
 }
 
@@ -450,11 +427,37 @@
 	return abreg->plfdata->external_voltage;
 }
 
+static int ab3100_enable_time_regulator(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+
+	/* Per-regulator power on delay from spec */
+	switch (abreg->regreg) {
+	case AB3100_LDO_A: /* Fallthrough */
+	case AB3100_LDO_C: /* Fallthrough */
+	case AB3100_LDO_D: /* Fallthrough */
+	case AB3100_LDO_E: /* Fallthrough */
+	case AB3100_LDO_H: /* Fallthrough */
+	case AB3100_LDO_K:
+		return 200;
+	case AB3100_LDO_F:
+		return 600;
+	case AB3100_LDO_G:
+		return 400;
+	case AB3100_BUCK:
+		return 1000;
+	default:
+		break;
+	}
+	return 0;
+}
+
 static struct regulator_ops regulator_ops_fixed = {
 	.enable      = ab3100_enable_regulator,
 	.disable     = ab3100_disable_regulator,
 	.is_enabled  = ab3100_is_enabled_regulator,
 	.get_voltage = ab3100_get_voltage_regulator,
+	.enable_time = ab3100_enable_time_regulator,
 };
 
 static struct regulator_ops regulator_ops_variable = {
@@ -464,6 +467,7 @@
 	.get_voltage = ab3100_get_voltage_regulator,
 	.set_voltage = ab3100_set_voltage_regulator,
 	.list_voltage = ab3100_list_voltage_regulator,
+	.enable_time = ab3100_enable_time_regulator,
 };
 
 static struct regulator_ops regulator_ops_variable_sleepable = {
@@ -474,6 +478,7 @@
 	.set_voltage = ab3100_set_voltage_regulator,
 	.set_suspend_voltage = ab3100_set_suspend_voltage_regulator,
 	.list_voltage = ab3100_list_voltage_regulator,
+	.enable_time = ab3100_enable_time_regulator,
 };
 
 /*

diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c
index d9a052c..02f3c23 100644
--- a/drivers/regulator/ab8500.c
+++ b/drivers/regulator/ab8500.c

@@ -9,7 +9,7 @@
  * AB8500 peripheral regulators
  *
  * AB8500 supports the following regulators:
- *   VAUX1/2/3, VINTCORE, VTVOUT, VAUDIO, VAMIC1/2, VDMIC, VANA
+ *   VAUX1/2/3, VINTCORE, VTVOUT, VUSB, VAUDIO, VAMIC1/2, VDMIC, VANA
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -38,6 +38,7 @@
  * @voltage_mask: mask to control regulator voltage
  * @voltages: supported voltage table
  * @voltages_len: number of supported voltages for the regulator
+ * @delay: startup/set voltage delay in us
  */
 struct ab8500_regulator_info {
 	struct device		*dev;
@@ -55,6 +56,7 @@
 	u8 voltage_mask;
 	int const *voltages;
 	int voltages_len;
+	unsigned int delay;
 };
 
 /* voltage tables for the vauxn/vintcore supplies */
@@ -290,6 +292,29 @@
 	return ret;
 }
 
+static int ab8500_regulator_enable_time(struct regulator_dev *rdev)
+{
+	struct ab8500_regulator_info *info = rdev_get_drvdata(rdev);
+
+	return info->delay;
+}
+
+static int ab8500_regulator_set_voltage_time_sel(struct regulator_dev *rdev,
+					     unsigned int old_sel,
+					     unsigned int new_sel)
+{
+	struct ab8500_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	/* If the regulator isn't on, it won't take time here */
+	ret = ab8500_regulator_is_enabled(rdev);
+	if (ret < 0)
+		return ret;
+	if (!ret)
+		return 0;
+	return info->delay;
+}
+
 static struct regulator_ops ab8500_regulator_ops = {
 	.enable		= ab8500_regulator_enable,
 	.disable	= ab8500_regulator_disable,
@@ -297,6 +322,8 @@
 	.get_voltage	= ab8500_regulator_get_voltage,
 	.set_voltage	= ab8500_regulator_set_voltage,
 	.list_voltage	= ab8500_list_voltage,
+	.enable_time	= ab8500_regulator_enable_time,
+	.set_voltage_time_sel = ab8500_regulator_set_voltage_time_sel,
 };
 
 static int ab8500_fixed_get_voltage(struct regulator_dev *rdev)
@@ -317,6 +344,8 @@
 	.is_enabled	= ab8500_regulator_is_enabled,
 	.get_voltage	= ab8500_fixed_get_voltage,
 	.list_voltage	= ab8500_list_voltage,
+	.enable_time	= ab8500_regulator_enable_time,
+	.set_voltage_time_sel = ab8500_regulator_set_voltage_time_sel,
 };
 
 static struct ab8500_regulator_info
@@ -426,12 +455,28 @@
 			.owner		= THIS_MODULE,
 			.n_voltages	= 1,
 		},
+		.delay			= 10000,
 		.fixed_uV		= 2000000,
 		.update_bank		= 0x03,
 		.update_reg		= 0x80,
 		.update_mask		= 0x82,
 		.update_val_enable	= 0x02,
 	},
+	[AB8500_LDO_USB] = {
+		.desc = {
+			.name           = "LDO-USB",
+			.ops            = &ab8500_regulator_fixed_ops,
+			.type           = REGULATOR_VOLTAGE,
+			.id             = AB8500_LDO_USB,
+			.owner          = THIS_MODULE,
+			.n_voltages     = 1,
+		},
+		.fixed_uV               = 3300000,
+		.update_bank            = 0x03,
+		.update_reg             = 0x82,
+		.update_mask            = 0x03,
+		.update_val_enable      = 0x01,
+	},
 	[AB8500_LDO_AUDIO] = {
 		.desc = {
 			.name		= "LDO-AUDIO",
@@ -511,6 +556,186 @@
 
 };
 
+struct ab8500_reg_init {
+	u8 bank;
+	u8 addr;
+	u8 mask;
+};
+
+#define REG_INIT(_id, _bank, _addr, _mask)	\
+	[_id] = {				\
+		.bank = _bank,			\
+		.addr = _addr,			\
+		.mask = _mask,			\
+	}
+
+static struct ab8500_reg_init ab8500_reg_init[] = {
+	/*
+	 * 0x30, VanaRequestCtrl
+	 * 0x0C, VpllRequestCtrl
+	 * 0xc0, VextSupply1RequestCtrl
+	 */
+	REG_INIT(AB8500_REGUREQUESTCTRL2,	0x03, 0x04, 0xfc),
+	/*
+	 * 0x03, VextSupply2RequestCtrl
+	 * 0x0c, VextSupply3RequestCtrl
+	 * 0x30, Vaux1RequestCtrl
+	 * 0xc0, Vaux2RequestCtrl
+	 */
+	REG_INIT(AB8500_REGUREQUESTCTRL3,	0x03, 0x05, 0xff),
+	/*
+	 * 0x03, Vaux3RequestCtrl
+	 * 0x04, SwHPReq
+	 */
+	REG_INIT(AB8500_REGUREQUESTCTRL4,	0x03, 0x06, 0x07),
+	/*
+	 * 0x08, VanaSysClkReq1HPValid
+	 * 0x20, Vaux1SysClkReq1HPValid
+	 * 0x40, Vaux2SysClkReq1HPValid
+	 * 0x80, Vaux3SysClkReq1HPValid
+	 */
+	REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID1,	0x03, 0x07, 0xe8),
+	/*
+	 * 0x10, VextSupply1SysClkReq1HPValid
+	 * 0x20, VextSupply2SysClkReq1HPValid
+	 * 0x40, VextSupply3SysClkReq1HPValid
+	 */
+	REG_INIT(AB8500_REGUSYSCLKREQ1HPVALID2,	0x03, 0x08, 0x70),
+	/*
+	 * 0x08, VanaHwHPReq1Valid
+	 * 0x20, Vaux1HwHPReq1Valid
+	 * 0x40, Vaux2HwHPReq1Valid
+	 * 0x80, Vaux3HwHPReq1Valid
+	 */
+	REG_INIT(AB8500_REGUHWHPREQ1VALID1,	0x03, 0x09, 0xe8),
+	/*
+	 * 0x01, VextSupply1HwHPReq1Valid
+	 * 0x02, VextSupply2HwHPReq1Valid
+	 * 0x04, VextSupply3HwHPReq1Valid
+	 */
+	REG_INIT(AB8500_REGUHWHPREQ1VALID2,	0x03, 0x0a, 0x07),
+	/*
+	 * 0x08, VanaHwHPReq2Valid
+	 * 0x20, Vaux1HwHPReq2Valid
+	 * 0x40, Vaux2HwHPReq2Valid
+	 * 0x80, Vaux3HwHPReq2Valid
+	 */
+	REG_INIT(AB8500_REGUHWHPREQ2VALID1,	0x03, 0x0b, 0xe8),
+	/*
+	 * 0x01, VextSupply1HwHPReq2Valid
+	 * 0x02, VextSupply2HwHPReq2Valid
+	 * 0x04, VextSupply3HwHPReq2Valid
+	 */
+	REG_INIT(AB8500_REGUHWHPREQ2VALID2,	0x03, 0x0c, 0x07),
+	/*
+	 * 0x20, VanaSwHPReqValid
+	 * 0x80, Vaux1SwHPReqValid
+	 */
+	REG_INIT(AB8500_REGUSWHPREQVALID1,	0x03, 0x0d, 0xa0),
+	/*
+	 * 0x01, Vaux2SwHPReqValid
+	 * 0x02, Vaux3SwHPReqValid
+	 * 0x04, VextSupply1SwHPReqValid
+	 * 0x08, VextSupply2SwHPReqValid
+	 * 0x10, VextSupply3SwHPReqValid
+	 */
+	REG_INIT(AB8500_REGUSWHPREQVALID2,	0x03, 0x0e, 0x1f),
+	/*
+	 * 0x02, SysClkReq2Valid1
+	 * ...
+	 * 0x80, SysClkReq8Valid1
+	 */
+	REG_INIT(AB8500_REGUSYSCLKREQVALID1,	0x03, 0x0f, 0xfe),
+	/*
+	 * 0x02, SysClkReq2Valid2
+	 * ...
+	 * 0x80, SysClkReq8Valid2
+	 */
+	REG_INIT(AB8500_REGUSYSCLKREQVALID2,	0x03, 0x10, 0xfe),
+	/*
+	 * 0x02, VTVoutEna
+	 * 0x04, Vintcore12Ena
+	 * 0x38, Vintcore12Sel
+	 * 0x40, Vintcore12LP
+	 * 0x80, VTVoutLP
+	 */
+	REG_INIT(AB8500_REGUMISC1,		0x03, 0x80, 0xfe),
+	/*
+	 * 0x02, VaudioEna
+	 * 0x04, VdmicEna
+	 * 0x08, Vamic1Ena
+	 * 0x10, Vamic2Ena
+	 */
+	REG_INIT(AB8500_VAUDIOSUPPLY,		0x03, 0x83, 0x1e),
+	/*
+	 * 0x01, Vamic1_dzout
+	 * 0x02, Vamic2_dzout
+	 */
+	REG_INIT(AB8500_REGUCTRL1VAMIC,		0x03, 0x84, 0x03),
+	/*
+	 * 0x0c, VanaRegu
+	 * 0x03, VpllRegu
+	 */
+	REG_INIT(AB8500_VPLLVANAREGU,		0x04, 0x06, 0x0f),
+	/*
+	 * 0x01, VrefDDREna
+	 * 0x02, VrefDDRSleepMode
+	 */
+	REG_INIT(AB8500_VREFDDR,		0x04, 0x07, 0x03),
+	/*
+	 * 0x03, VextSupply1Regu
+	 * 0x0c, VextSupply2Regu
+	 * 0x30, VextSupply3Regu
+	 * 0x40, ExtSupply2Bypass
+	 * 0x80, ExtSupply3Bypass
+	 */
+	REG_INIT(AB8500_EXTSUPPLYREGU,		0x04, 0x08, 0xff),
+	/*
+	 * 0x03, Vaux1Regu
+	 * 0x0c, Vaux2Regu
+	 */
+	REG_INIT(AB8500_VAUX12REGU,		0x04, 0x09, 0x0f),
+	/*
+	 * 0x03, Vaux3Regu
+	 */
+	REG_INIT(AB8500_VRF1VAUX3REGU,		0x04, 0x0a, 0x03),
+	/*
+	 * 0x3f, Vsmps1Sel1
+	 */
+	REG_INIT(AB8500_VSMPS1SEL1,		0x04, 0x13, 0x3f),
+	/*
+	 * 0x0f, Vaux1Sel
+	 */
+	REG_INIT(AB8500_VAUX1SEL,		0x04, 0x1f, 0x0f),
+	/*
+	 * 0x0f, Vaux2Sel
+	 */
+	REG_INIT(AB8500_VAUX2SEL,		0x04, 0x20, 0x0f),
+	/*
+	 * 0x07, Vaux3Sel
+	 */
+	REG_INIT(AB8500_VRF1VAUX3SEL,		0x04, 0x21, 0x07),
+	/*
+	 * 0x01, VextSupply12LP
+	 */
+	REG_INIT(AB8500_REGUCTRL2SPARE,		0x04, 0x22, 0x01),
+	/*
+	 * 0x04, Vaux1Disch
+	 * 0x08, Vaux2Disch
+	 * 0x10, Vaux3Disch
+	 * 0x20, Vintcore12Disch
+	 * 0x40, VTVoutDisch
+	 * 0x80, VaudioDisch
+	 */
+	REG_INIT(AB8500_REGUCTRLDISCH,		0x04, 0x43, 0xfc),
+	/*
+	 * 0x02, VanaDisch
+	 * 0x04, VdmicPullDownEna
+	 * 0x10, VdmicDisch
+	 */
+	REG_INIT(AB8500_REGUCTRLDISCH2,		0x04, 0x44, 0x16),
+};
+
 static __devinit int ab8500_regulator_probe(struct platform_device *pdev)
 {
 	struct ab8500 *ab8500 = dev_get_drvdata(pdev->dev.parent);
@@ -529,10 +754,51 @@
 
 	/* make sure the platform data has the correct size */
 	if (pdata->num_regulator != ARRAY_SIZE(ab8500_regulator_info)) {
-		dev_err(&pdev->dev, "platform configuration error\n");
+		dev_err(&pdev->dev, "Configuration error: size mismatch.\n");
 		return -EINVAL;
 	}
 
+	/* initialize registers */
+	for (i = 0; i < pdata->num_regulator_reg_init; i++) {
+		int id;
+		u8 value;
+
+		id = pdata->regulator_reg_init[i].id;
+		value = pdata->regulator_reg_init[i].value;
+
+		/* check for configuration errors */
+		if (id >= AB8500_NUM_REGULATOR_REGISTERS) {
+			dev_err(&pdev->dev,
+				"Configuration error: id outside range.\n");
+			return -EINVAL;
+		}
+		if (value & ~ab8500_reg_init[id].mask) {
+			dev_err(&pdev->dev,
+				"Configuration error: value outside mask.\n");
+			return -EINVAL;
+		}
+
+		/* initialize register */
+		err = abx500_mask_and_set_register_interruptible(&pdev->dev,
+			ab8500_reg_init[id].bank,
+			ab8500_reg_init[id].addr,
+			ab8500_reg_init[id].mask,
+			value);
+		if (err < 0) {
+			dev_err(&pdev->dev,
+				"Failed to initialize 0x%02x, 0x%02x.\n",
+				ab8500_reg_init[id].bank,
+				ab8500_reg_init[id].addr);
+			return err;
+		}
+		dev_vdbg(&pdev->dev,
+			"  init: 0x%02x, 0x%02x, 0x%02x, 0x%02x\n",
+			ab8500_reg_init[id].bank,
+			ab8500_reg_init[id].addr,
+			ab8500_reg_init[id].mask,
+			value);
+	}
+
 	/* register all regulators */
 	for (i = 0; i < ARRAY_SIZE(ab8500_regulator_info); i++) {
 		struct ab8500_regulator_info *info = NULL;

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 9fa2095..3ffc697 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c

@@ -1629,6 +1629,7 @@
 				     int min_uV, int max_uV)
 {
 	int ret;
+	int delay = 0;
 	unsigned int selector;
 
 	trace_regulator_set_voltage(rdev_get_name(rdev), min_uV, max_uV);
@@ -1662,6 +1663,22 @@
 			}
 		}
 
+		/*
+		 * If we can't obtain the old selector there is not enough
+		 * info to call set_voltage_time_sel().
+		 */
+		if (rdev->desc->ops->set_voltage_time_sel &&
+		    rdev->desc->ops->get_voltage_sel) {
+			unsigned int old_selector = 0;
+
+			ret = rdev->desc->ops->get_voltage_sel(rdev);
+			if (ret < 0)
+				return ret;
+			old_selector = ret;
+			delay = rdev->desc->ops->set_voltage_time_sel(rdev,
+						old_selector, selector);
+		}
+
 		if (best_val != INT_MAX) {
 			ret = rdev->desc->ops->set_voltage_sel(rdev, selector);
 			selector = best_val;
@@ -1672,6 +1689,14 @@
 		ret = -EINVAL;
 	}
 
+	/* Insert any necessary delays */
+	if (delay >= 1000) {
+		mdelay(delay / 1000);
+		udelay(delay % 1000);
+	} else if (delay) {
+		udelay(delay);
+	}
+
 	if (ret == 0)
 		_notifier_call_chain(rdev, REGULATOR_EVENT_VOLTAGE_CHANGE,
 				     NULL);
@@ -1740,6 +1765,51 @@
 EXPORT_SYMBOL_GPL(regulator_set_voltage);
 
 /**
+ * regulator_set_voltage_time - get raise/fall time
+ * @regulator: regulator source
+ * @old_uV: starting voltage in microvolts
+ * @new_uV: target voltage in microvolts
+ *
+ * Provided with the starting and ending voltage, this function attempts to
+ * calculate the time in microseconds required to rise or fall to this new
+ * voltage.
+ */
+int regulator_set_voltage_time(struct regulator *regulator,
+			       int old_uV, int new_uV)
+{
+	struct regulator_dev	*rdev = regulator->rdev;
+	struct regulator_ops	*ops = rdev->desc->ops;
+	int old_sel = -1;
+	int new_sel = -1;
+	int voltage;
+	int i;
+
+	/* Currently requires operations to do this */
+	if (!ops->list_voltage || !ops->set_voltage_time_sel
+	    || !rdev->desc->n_voltages)
+		return -EINVAL;
+
+	for (i = 0; i < rdev->desc->n_voltages; i++) {
+		/* We only look for exact voltage matches here */
+		voltage = regulator_list_voltage(regulator, i);
+		if (voltage < 0)
+			return -EINVAL;
+		if (voltage == 0)
+			continue;
+		if (voltage == old_uV)
+			old_sel = i;
+		if (voltage == new_uV)
+			new_sel = i;
+	}
+
+	if (old_sel < 0 || new_sel < 0)
+		return -EINVAL;
+
+	return ops->set_voltage_time_sel(rdev, old_sel, new_sel);
+}
+EXPORT_SYMBOL_GPL(regulator_set_voltage_time);
+
+/**
  * regulator_sync_voltage - re-apply last regulator output voltage
  * @regulator: regulator source
  *
@@ -2565,8 +2635,11 @@
 			init_data->consumer_supplies[i].dev,
 			init_data->consumer_supplies[i].dev_name,
 			init_data->consumer_supplies[i].supply);
-		if (ret < 0)
+		if (ret < 0) {
+			dev_err(dev, "Failed to set supply %s\n",
+				init_data->consumer_supplies[i].supply);
 			goto unset_supplies;
+		}
 	}
 
 	list_add(&rdev->list, &regulator_list);
@@ -2653,6 +2726,47 @@
 EXPORT_SYMBOL_GPL(regulator_suspend_prepare);
 
 /**
+ * regulator_suspend_finish - resume regulators from system wide suspend
+ *
+ * Turn on regulators that might be turned off by regulator_suspend_prepare
+ * and that should be turned on according to the regulators properties.
+ */
+int regulator_suspend_finish(void)
+{
+	struct regulator_dev *rdev;
+	int ret = 0, error;
+
+	mutex_lock(&regulator_list_mutex);
+	list_for_each_entry(rdev, &regulator_list, list) {
+		struct regulator_ops *ops = rdev->desc->ops;
+
+		mutex_lock(&rdev->mutex);
+		if ((rdev->use_count > 0  || rdev->constraints->always_on) &&
+				ops->enable) {
+			error = ops->enable(rdev);
+			if (error)
+				ret = error;
+		} else {
+			if (!has_full_constraints)
+				goto unlock;
+			if (!ops->disable)
+				goto unlock;
+			if (ops->is_enabled && !ops->is_enabled(rdev))
+				goto unlock;
+
+			error = ops->disable(rdev);
+			if (error)
+				ret = error;
+		}
+unlock:
+		mutex_unlock(&rdev->mutex);
+	}
+	mutex_unlock(&regulator_list_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_suspend_finish);
+
+/**
  * regulator_has_full_constraints - the system has fully specified constraints
  *
  * Calling this function will cause the regulator API to disable all

diff --git a/drivers/regulator/max8997.c b/drivers/regulator/max8997.c
index 01ef7e9..77e0cfb 100644
--- a/drivers/regulator/max8997.c
+++ b/drivers/regulator/max8997.c

@@ -1185,6 +1185,7 @@
 	{ "max8997-pmic", 0},
 	{ },
 };
+MODULE_DEVICE_TABLE(platform, max8997_pmic_id);
 
 static struct platform_driver max8997_pmic_driver = {
 	.driver = {

diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c
index 0ec49ca..4341026 100644
--- a/drivers/regulator/max8998.c
+++ b/drivers/regulator/max8998.c

@@ -887,6 +887,7 @@
 	{ "lp3974-pmic", TYPE_LP3974 },
 	{ }
 };
+MODULE_DEVICE_TABLE(platform, max8998_pmic_id);
 
 static struct platform_driver max8998_pmic_driver = {
 	.driver = {

diff --git a/drivers/regulator/tps6524x-regulator.c b/drivers/regulator/tps6524x-regulator.c
index 176a6be..9166aa0 100644
--- a/drivers/regulator/tps6524x-regulator.c
+++ b/drivers/regulator/tps6524x-regulator.c

@@ -596,7 +596,7 @@
 	.get_current_limit	= get_current_limit,
 };
 
-static int __devexit pmic_remove(struct spi_device *spi)
+static int pmic_remove(struct spi_device *spi)
 {
 	struct tps6524x *hw = spi_get_drvdata(spi);
 	int i;

diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index 06df898..e93453b 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c

@@ -565,9 +565,8 @@
 	}
 
 	irq = platform_get_irq_byname(pdev, "UV");
-	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_uv_irq,
-				 IRQF_TRIGGER_RISING, dcdc->name,
-				 dcdc);
+	ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq,
+				   IRQF_TRIGGER_RISING, dcdc->name, dcdc);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
 			irq, ret);
@@ -575,9 +574,8 @@
 	}
 
 	irq = platform_get_irq_byname(pdev, "HC");
-	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_oc_irq,
-				 IRQF_TRIGGER_RISING, dcdc->name,
-				 dcdc);
+	ret = request_threaded_irq(irq, NULL, wm831x_dcdc_oc_irq,
+				   IRQF_TRIGGER_RISING, dcdc->name, dcdc);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to request HC IRQ %d: %d\n",
 			irq, ret);
@@ -589,7 +587,7 @@
 	return 0;
 
 err_uv:
-	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+	free_irq(platform_get_irq_byname(pdev, "UV"), dcdc);
 err_regulator:
 	regulator_unregister(dcdc->regulator);
 err:
@@ -606,8 +604,8 @@
 
 	platform_set_drvdata(pdev, NULL);
 
-	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "HC"), dcdc);
-	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+	free_irq(platform_get_irq_byname(pdev, "HC"), dcdc);
+	free_irq(platform_get_irq_byname(pdev, "UV"), dcdc);
 	regulator_unregister(dcdc->regulator);
 	if (dcdc->dvs_gpio)
 		gpio_free(dcdc->dvs_gpio);
@@ -756,9 +754,8 @@
 	}
 
 	irq = platform_get_irq_byname(pdev, "UV");
-	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_uv_irq,
-				 IRQF_TRIGGER_RISING, dcdc->name,
-				 dcdc);
+	ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq,
+				   IRQF_TRIGGER_RISING,	dcdc->name, dcdc);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
 			irq, ret);
@@ -783,7 +780,7 @@
 
 	platform_set_drvdata(pdev, NULL);
 
-	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+	free_irq(platform_get_irq_byname(pdev, "UV"), dcdc);
 	regulator_unregister(dcdc->regulator);
 	kfree(dcdc);
 
@@ -885,9 +882,9 @@
 	}
 
 	irq = platform_get_irq_byname(pdev, "UV");
-	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_uv_irq,
-				 IRQF_TRIGGER_RISING, dcdc->name,
-				 dcdc);
+	ret = request_threaded_irq(irq, NULL, wm831x_dcdc_uv_irq,
+				   IRQF_TRIGGER_RISING, dcdc->name,
+				   dcdc);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
 			irq, ret);
@@ -908,11 +905,10 @@
 static __devexit int wm831x_boostp_remove(struct platform_device *pdev)
 {
 	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
-	struct wm831x *wm831x = dcdc->wm831x;
 
 	platform_set_drvdata(pdev, NULL);
 
-	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+	free_irq(platform_get_irq_byname(pdev, "UV"), dcdc);
 	regulator_unregister(dcdc->regulator);
 	kfree(dcdc);
 

diff --git a/drivers/regulator/wm831x-isink.c b/drivers/regulator/wm831x-isink.c
index 6c446cd..01f27c7 100644
--- a/drivers/regulator/wm831x-isink.c
+++ b/drivers/regulator/wm831x-isink.c

@@ -198,9 +198,8 @@
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	ret = wm831x_request_irq(wm831x, irq, wm831x_isink_irq,
-				 IRQF_TRIGGER_RISING, isink->name,
-				 isink);
+	ret = request_threaded_irq(irq, NULL, wm831x_isink_irq,
+				   IRQF_TRIGGER_RISING, isink->name, isink);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to request ISINK IRQ %d: %d\n",
 			irq, ret);
@@ -221,11 +220,10 @@
 static __devexit int wm831x_isink_remove(struct platform_device *pdev)
 {
 	struct wm831x_isink *isink = platform_get_drvdata(pdev);
-	struct wm831x *wm831x = isink->wm831x;
 
 	platform_set_drvdata(pdev, NULL);
 
-	wm831x_free_irq(wm831x, platform_get_irq(pdev, 0), isink);
+	free_irq(platform_get_irq(pdev, 0), isink);
 
 	regulator_unregister(isink->regulator);
 	kfree(isink);

diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
index c94fc5b..2220cf8 100644
--- a/drivers/regulator/wm831x-ldo.c
+++ b/drivers/regulator/wm831x-ldo.c

@@ -354,9 +354,9 @@
 	}
 
 	irq = platform_get_irq_byname(pdev, "UV");
-	ret = wm831x_request_irq(wm831x, irq, wm831x_ldo_uv_irq,
-				 IRQF_TRIGGER_RISING, ldo->name,
-				 ldo);
+	ret = request_threaded_irq(irq, NULL, wm831x_ldo_uv_irq,
+				   IRQF_TRIGGER_RISING, ldo->name,
+				   ldo);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
 			irq, ret);
@@ -377,11 +377,10 @@
 static __devexit int wm831x_gp_ldo_remove(struct platform_device *pdev)
 {
 	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
-	struct wm831x *wm831x = ldo->wm831x;
 
 	platform_set_drvdata(pdev, NULL);
 
-	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), ldo);
+	free_irq(platform_get_irq_byname(pdev, "UV"), ldo);
 	regulator_unregister(ldo->regulator);
 	kfree(ldo);
 
@@ -619,9 +618,8 @@
 	}
 
 	irq = platform_get_irq_byname(pdev, "UV");
-	ret = wm831x_request_irq(wm831x, irq, wm831x_ldo_uv_irq,
-				 IRQF_TRIGGER_RISING, ldo->name,
-				 ldo);
+	ret = request_threaded_irq(irq, NULL, wm831x_ldo_uv_irq,
+				   IRQF_TRIGGER_RISING, ldo->name, ldo);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
 			irq, ret);
@@ -642,9 +640,8 @@
 static __devexit int wm831x_aldo_remove(struct platform_device *pdev)
 {
 	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
-	struct wm831x *wm831x = ldo->wm831x;
 
-	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), ldo);
+	free_irq(platform_get_irq_byname(pdev, "UV"), ldo);
 	regulator_unregister(ldo->regulator);
 	kfree(ldo);
 

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 379d859..459f2cb 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c

@@ -3982,8 +3982,10 @@
 }
 
 static struct ccw_driver dasd_eckd_driver = {
-	.name	     = "dasd-eckd",
-	.owner	     = THIS_MODULE,
+	.driver = {
+		.name	= "dasd-eckd",
+		.owner	= THIS_MODULE,
+	},
 	.ids	     = dasd_eckd_ids,
 	.probe	     = dasd_eckd_probe,
 	.remove      = dasd_generic_remove,

diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index be89b3a..4b71b11 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c

@@ -65,8 +65,10 @@
 }
 
 static struct ccw_driver dasd_fba_driver = {
-	.name        = "dasd-fba",
-	.owner       = THIS_MODULE,
+	.driver = {
+		.name	= "dasd-fba",
+		.owner	= THIS_MODULE,
+	},
 	.ids         = dasd_fba_ids,
 	.probe       = dasd_fba_probe,
 	.remove      = dasd_generic_remove,

diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 3fb4335..694464c 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c

@@ -764,8 +764,10 @@
 };
 
 static struct ccw_driver raw3215_ccw_driver = {
-	.name		= "3215",
-	.owner		= THIS_MODULE,
+	.driver = {
+		.name	= "3215",
+		.owner	= THIS_MODULE,
+	},
 	.ids		= raw3215_id,
 	.probe		= &raw3215_probe,
 	.remove		= &raw3215_remove,

diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index 96ba2fd..4c02376 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c

@@ -1388,8 +1388,10 @@
 };
 
 static struct ccw_driver raw3270_ccw_driver = {
-	.name		= "3270",
-	.owner		= THIS_MODULE,
+	.driver = {
+		.name	= "3270",
+		.owner	= THIS_MODULE,
+	},
 	.ids		= raw3270_id,
 	.probe		= &raw3270_probe,
 	.remove		= &raw3270_remove,

diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index c265111..9eff2df 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c

@@ -1320,8 +1320,10 @@
 }
 
 static struct ccw_driver tape_34xx_driver = {
-	.name = "tape_34xx",
-	.owner = THIS_MODULE,
+	.driver = {
+		.name = "tape_34xx",
+		.owner = THIS_MODULE,
+	},
 	.ids = tape_34xx_ids,
 	.probe = tape_generic_probe,
 	.remove = tape_generic_remove,

diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index de2e99e..b98dcbd 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c

@@ -1761,8 +1761,10 @@
 }
 
 static struct ccw_driver tape_3590_driver = {
-	.name = "tape_3590",
-	.owner = THIS_MODULE,
+	.driver = {
+		.name = "tape_3590",
+		.owner = THIS_MODULE,
+	},
 	.ids = tape_3590_ids,
 	.probe = tape_generic_probe,
 	.remove = tape_generic_remove,

diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index caef175..f6b00c3 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c

@@ -64,8 +64,10 @@
 static int ur_pm_suspend(struct ccw_device *cdev);
 
 static struct ccw_driver ur_driver = {
-	.name		= "vmur",
-	.owner		= THIS_MODULE,
+	.driver = {
+		.name	= "vmur",
+		.owner	= THIS_MODULE,
+	},
 	.ids		= ur_ids,
 	.probe		= ur_probe,
 	.remove		= ur_remove,

diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index 2864581..5c56741 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c

@@ -428,7 +428,7 @@
 	gdev = to_ccwgroupdev(dev);
 	gdrv = to_ccwgroupdrv(dev->driver);
 
-	if (!try_module_get(gdrv->owner))
+	if (!try_module_get(gdrv->driver.owner))
 		return -EINVAL;
 
 	ret = strict_strtoul(buf, 0, &value);
@@ -442,7 +442,7 @@
 	else
 		ret = -EINVAL;
 out:
-	module_put(gdrv->owner);
+	module_put(gdrv->driver.owner);
 	return (ret == 0) ? count : ret;
 }
 
@@ -616,8 +616,6 @@
 {
 	/* register our new driver with the core */
 	cdriver->driver.bus = &ccwgroup_bus_type;
-	cdriver->driver.name = cdriver->name;
-	cdriver->driver.owner = cdriver->owner;
 
 	return driver_register(&cdriver->driver);
 }

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index e50b121..df14c51 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c

@@ -127,7 +127,7 @@
 	return ret;
 }
 
-struct bus_type ccw_bus_type;
+static struct bus_type ccw_bus_type;
 
 static void io_subchannel_irq(struct subchannel *);
 static int io_subchannel_probe(struct subchannel *);
@@ -547,7 +547,7 @@
 	if (atomic_cmpxchg(&cdev->private->onoff, 0, 1) != 0)
 		return -EAGAIN;
 
-	if (cdev->drv && !try_module_get(cdev->drv->owner)) {
+	if (cdev->drv && !try_module_get(cdev->drv->driver.owner)) {
 		atomic_set(&cdev->private->onoff, 0);
 		return -EINVAL;
 	}
@@ -573,7 +573,7 @@
 	}
 out:
 	if (cdev->drv)
-		module_put(cdev->drv->owner);
+		module_put(cdev->drv->driver.owner);
 	atomic_set(&cdev->private->onoff, 0);
 	return (ret < 0) ? ret : count;
 }
@@ -1970,7 +1970,7 @@
 	.restore = ccw_device_pm_restore,
 };
 
-struct bus_type ccw_bus_type = {
+static struct bus_type ccw_bus_type = {
 	.name   = "ccw",
 	.match  = ccw_bus_match,
 	.uevent = ccw_uevent,
@@ -1993,8 +1993,6 @@
 	struct device_driver *drv = &cdriver->driver;
 
 	drv->bus = &ccw_bus_type;
-	drv->name = cdriver->name;
-	drv->owner = cdriver->owner;
 
 	return driver_register(drv);
 }
@@ -2112,5 +2110,4 @@
 EXPORT_SYMBOL(ccw_driver_register);
 EXPORT_SYMBOL(ccw_driver_unregister);
 EXPORT_SYMBOL(get_ccwdev_by_busid);
-EXPORT_SYMBOL(ccw_bus_type);
 EXPORT_SYMBOL_GPL(ccw_device_get_subchannel_id);

diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index 379de2d..7e297c7 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h

@@ -133,7 +133,6 @@
 /* qdio needs this. */
 void ccw_device_set_timeout(struct ccw_device *, int);
 extern struct subchannel_id ccw_device_get_subchannel_id(struct ccw_device *);
-extern struct bus_type ccw_bus_type;
 
 /* Channel measurement facility related */
 void retry_set_schib(struct ccw_device *cdev);

diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 5640c89..479c665 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c

@@ -1508,7 +1508,8 @@
 
 	if (irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)
 		return -EBUSY;
-
+	if (!count)
+		return 0;
 	if (callflags & QDIO_FLAG_SYNC_INPUT)
 		return handle_inbound(irq_ptr->input_qs[q_nr],
 				      callflags, bufnr, count);

diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index ce3a5c1..9feb62f 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c

@@ -264,8 +264,10 @@
 /* ccwgroup table  */
 
 static struct ccwgroup_driver claw_group_driver = {
-        .owner       = THIS_MODULE,
-        .name        = "claw",
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "claw",
+	},
         .max_slaves  = 2,
         .driver_id   = 0xC3D3C1E6,
         .probe       = claw_probe,
@@ -282,8 +284,10 @@
 MODULE_DEVICE_TABLE(ccw, claw_ids);
 
 static struct ccw_driver claw_ccw_driver = {
-	.owner	= THIS_MODULE,
-	.name	= "claw",
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "claw",
+	},
 	.ids	= claw_ids,
 	.probe	= ccwgroup_probe_ccwdev,
 	.remove	= ccwgroup_remove_ccwdev,

diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 4c28459..c189296 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c

@@ -1764,16 +1764,20 @@
 MODULE_DEVICE_TABLE(ccw, ctcm_ids);
 
 static struct ccw_driver ctcm_ccw_driver = {
-	.owner	= THIS_MODULE,
-	.name	= "ctcm",
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "ctcm",
+	},
 	.ids	= ctcm_ids,
 	.probe	= ccwgroup_probe_ccwdev,
 	.remove	= ccwgroup_remove_ccwdev,
 };
 
 static struct ccwgroup_driver ctcm_group_driver = {
-	.owner       = THIS_MODULE,
-	.name        = CTC_DRIVER_NAME,
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= CTC_DRIVER_NAME,
+	},
 	.max_slaves  = 2,
 	.driver_id   = 0xC3E3C3D4,	/* CTCM */
 	.probe       = ctcm_probe_device,

diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index 30b2a82..7fbc4ad 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c

@@ -2396,8 +2396,10 @@
 MODULE_DEVICE_TABLE(ccw, lcs_ids);
 
 static struct ccw_driver lcs_ccw_driver = {
-	.owner	= THIS_MODULE,
-	.name	= "lcs",
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "lcs",
+	},
 	.ids	= lcs_ids,
 	.probe	= ccwgroup_probe_ccwdev,
 	.remove	= ccwgroup_remove_ccwdev,
@@ -2407,8 +2409,10 @@
  * LCS ccwgroup driver registration
  */
 static struct ccwgroup_driver lcs_group_driver = {
-	.owner       = THIS_MODULE,
-	.name        = "lcs",
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "lcs",
+	},
 	.max_slaves  = 2,
 	.driver_id   = 0xD3C3E2,
 	.probe       = lcs_probe_device,

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 25eef304..10a3a3b 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c

@@ -3902,7 +3902,9 @@
 MODULE_DEVICE_TABLE(ccw, qeth_ids);
 
 static struct ccw_driver qeth_ccw_driver = {
-	.name = "qeth",
+	.driver = {
+		.name = "qeth",
+	},
 	.ids = qeth_ids,
 	.probe = ccwgroup_probe_ccwdev,
 	.remove = ccwgroup_remove_ccwdev,
@@ -4428,8 +4430,10 @@
 }
 
 static struct ccwgroup_driver qeth_core_ccwgroup_driver = {
-	.owner = THIS_MODULE,
-	.name = "qeth",
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "qeth",
+	},
 	.driver_id = 0xD8C5E3C8,
 	.probe = qeth_core_probe_device,
 	.remove = qeth_core_remove_device,

diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
index 4f7852d..e8b7cee 100644
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c

@@ -251,8 +251,10 @@
 }
 
 struct ccw_driver zfcp_ccw_driver = {
-	.owner       = THIS_MODULE,
-	.name        = "zfcp",
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "zfcp",
+	},
 	.ids         = zfcp_ccw_device_id,
 	.probe       = zfcp_ccw_probe,
 	.remove      = zfcp_ccw_remove,

diff --git a/drivers/scsi/aacraid/Makefile b/drivers/scsi/aacraid/Makefile
index 92df4d6..1bd9fd1 100644
--- a/drivers/scsi/aacraid/Makefile
+++ b/drivers/scsi/aacraid/Makefile

@@ -3,6 +3,6 @@
 obj-$(CONFIG_SCSI_AACRAID) := aacraid.o
 
 aacraid-objs	:= linit.o aachba.o commctrl.o comminit.o commsup.o \
-		   dpcsup.o rx.o sa.o rkt.o nark.o
+		   dpcsup.o rx.o sa.o rkt.o nark.o src.o
 
 ccflags-y	:= -Idrivers/scsi

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 7df2dd1..118ce83 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c

@@ -5,7 +5,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -1486,7 +1487,9 @@
 			dev->a_ops.adapter_write = aac_write_block;
 		}
 		dev->scsi_host_ptr->max_sectors = AAC_MAX_32BIT_SGBCOUNT;
-		if(!(dev->adapter_info.options & AAC_OPT_NEW_COMM)) {
+		if (dev->adapter_info.options & AAC_OPT_NEW_COMM_TYPE1)
+			dev->adapter_info.options |= AAC_OPT_NEW_COMM;
+		if (!(dev->adapter_info.options & AAC_OPT_NEW_COMM)) {
 			/*
 			 * Worst case size that could cause sg overflow when
 			 * we break up SG elements that are larger than 64KB.

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 4dbcc05..29ab000 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h

@@ -12,7 +12,7 @@
  *----------------------------------------------------------------------------*/
 
 #ifndef AAC_DRIVER_BUILD
-# define AAC_DRIVER_BUILD 26400
+# define AAC_DRIVER_BUILD 28000
 # define AAC_DRIVER_BRANCH "-ms"
 #endif
 #define MAXIMUM_NUM_CONTAINERS	32
@@ -277,6 +277,16 @@
 
 #define		FsaNormal	1
 
+/* transport FIB header (PMC) */
+struct aac_fib_xporthdr {
+	u64	HostAddress;	/* FIB host address w/o xport header */
+	u32	Size;		/* FIB size excluding xport header */
+	u32	Handle;		/* driver handle to reference the FIB */
+	u64	Reserved[2];
+};
+
+#define		ALIGN32		32
+
 /*
  * Define the FIB. The FIB is the where all the requested data and
  * command information are put to the application on the FSA adapter.
@@ -394,7 +404,9 @@
 	AdapterMicroFib			= (1<<17),
 	BIOSFibPath			= (1<<18),
 	FastResponseCapable		= (1<<19),
-	ApiFib				= (1<<20)	// Its an API Fib.
+	ApiFib				= (1<<20),	/* Its an API Fib */
+	/* PMC NEW COMM: There is no more AIF data pending */
+	NoMoreAifDataAvailable		= (1<<21)
 };
 
 /*
@@ -404,6 +416,7 @@
 
 #define ADAPTER_INIT_STRUCT_REVISION		3
 #define ADAPTER_INIT_STRUCT_REVISION_4		4 // rocket science
+#define ADAPTER_INIT_STRUCT_REVISION_6		6 /* PMC src */
 
 struct aac_init
 {
@@ -428,9 +441,15 @@
 #define INITFLAGS_NEW_COMM_SUPPORTED	0x00000001
 #define INITFLAGS_DRIVER_USES_UTC_TIME	0x00000010
 #define INITFLAGS_DRIVER_SUPPORTS_PM	0x00000020
+#define INITFLAGS_NEW_COMM_TYPE1_SUPPORTED	0x00000041
 	__le32	MaxIoCommands;	/* max outstanding commands */
 	__le32	MaxIoSize;	/* largest I/O command */
 	__le32	MaxFibSize;	/* largest FIB to adapter */
+	/* ADAPTER_INIT_STRUCT_REVISION_5 begins here */
+	__le32	MaxNumAif;	/* max number of aif */
+	/* ADAPTER_INIT_STRUCT_REVISION_6 begins here */
+	__le32	HostRRQ_AddrLow;
+	__le32	HostRRQ_AddrHigh;	/* Host RRQ (response queue) for SRC */
 };
 
 enum aac_log_level {
@@ -685,7 +704,7 @@
 #define OutboundDoorbellReg	MUnit.ODR
 
 struct rx_registers {
-	struct rx_mu_registers		MUnit;		/* 1300h - 1344h */
+	struct rx_mu_registers		MUnit;		/* 1300h - 1347h */
 	__le32				reserved1[2];	/* 1348h - 134ch */
 	struct rx_inbound		IndexRegs;
 };
@@ -703,7 +722,7 @@
 #define rkt_inbound rx_inbound
 
 struct rkt_registers {
-	struct rkt_mu_registers		MUnit;		 /* 1300h - 1344h */
+	struct rkt_mu_registers		MUnit;		 /* 1300h - 1347h */
 	__le32				reserved1[1006]; /* 1348h - 22fch */
 	struct rkt_inbound		IndexRegs;	 /* 2300h - */
 };
@@ -713,6 +732,44 @@
 #define rkt_writeb(AEP, CSR, value)	writeb(value, &((AEP)->regs.rkt->CSR))
 #define rkt_writel(AEP, CSR, value)	writel(value, &((AEP)->regs.rkt->CSR))
 
+/*
+ * PMC SRC message unit registers
+ */
+
+#define src_inbound rx_inbound
+
+struct src_mu_registers {
+				/*	PCI*| Name */
+	__le32	reserved0[8];	/*	00h | Reserved */
+	__le32	IDR;		/*	20h | Inbound Doorbell Register */
+	__le32	IISR;		/*	24h | Inbound Int. Status Register */
+	__le32	reserved1[3];	/*	28h | Reserved */
+	__le32	OIMR;		/*	34h | Outbound Int. Mask Register */
+	__le32	reserved2[25];	/*	38h | Reserved */
+	__le32	ODR_R;		/*	9ch | Outbound Doorbell Read */
+	__le32	ODR_C;		/*	a0h | Outbound Doorbell Clear */
+	__le32	reserved3[6];	/*	a4h | Reserved */
+	__le32	OMR;		/*	bch | Outbound Message Register */
+	__le32	IQ_L;		/*  c0h | Inbound Queue (Low address) */
+	__le32	IQ_H;		/*  c4h | Inbound Queue (High address) */
+};
+
+struct src_registers {
+	struct src_mu_registers MUnit;	/* 00h - c7h */
+	__le32 reserved1[130790];	/* c8h - 7fc5fh */
+	struct src_inbound IndexRegs;	/* 7fc60h */
+};
+
+#define src_readb(AEP, CSR)		readb(&((AEP)->regs.src.bar0->CSR))
+#define src_readl(AEP, CSR)		readl(&((AEP)->regs.src.bar0->CSR))
+#define src_writeb(AEP, CSR, value)	writeb(value, \
+						&((AEP)->regs.src.bar0->CSR))
+#define src_writel(AEP, CSR, value)	writel(value, \
+						&((AEP)->regs.src.bar0->CSR))
+
+#define SRC_ODR_SHIFT		12
+#define SRC_IDR_SHIFT		9
+
 typedef void (*fib_callback)(void *ctxt, struct fib *fibctx);
 
 struct aac_fib_context {
@@ -879,6 +936,7 @@
 #define AAC_OPTION_MU_RESET		cpu_to_le32(0x00000001)
 #define AAC_OPTION_IGNORE_RESET		cpu_to_le32(0x00000002)
 #define AAC_OPTION_POWER_MANAGEMENT	cpu_to_le32(0x00000004)
+#define AAC_OPTION_DOORBELL_RESET	cpu_to_le32(0x00004000)
 #define AAC_SIS_VERSION_V3	3
 #define AAC_SIS_SLOT_UNKNOWN	0xFF
 
@@ -940,6 +998,7 @@
 #define AAC_OPT_SUPPLEMENT_ADAPTER_INFO	cpu_to_le32(1<<16)
 #define AAC_OPT_NEW_COMM		cpu_to_le32(1<<17)
 #define AAC_OPT_NEW_COMM_64		cpu_to_le32(1<<18)
+#define AAC_OPT_NEW_COMM_TYPE1		cpu_to_le32(1<<28)
 
 struct aac_dev
 {
@@ -952,6 +1011,7 @@
 	 */
 	unsigned		max_fib_size;
 	unsigned		sg_tablesize;
+	unsigned		max_num_aif;
 
 	/*
 	 *	Map for 128 fib objects (64k)
@@ -980,10 +1040,21 @@
 	struct adapter_ops	a_ops;
 	unsigned long		fsrev;		/* Main driver's revision number */
 
-	unsigned		base_size;	/* Size of mapped in region */
+	unsigned long		dbg_base;	/* address of UART
+						 * debug buffer */
+
+	unsigned		base_size, dbg_size;	/* Size of
+							 *  mapped in region */
+
 	struct aac_init		*init;		/* Holds initialization info to communicate with adapter */
 	dma_addr_t		init_pa;	/* Holds physical address of the init struct */
 
+	u32			*host_rrq;	/* response queue
+						 * if AAC_COMM_MESSAGE_TYPE1 */
+
+	dma_addr_t		host_rrq_pa;	/* phys. address */
+	u32			host_rrq_idx;	/* index into rrq buffer */
+
 	struct pci_dev		*pdev;		/* Our PCI interface */
 	void *			printfbuf;	/* pointer to buffer used for printf's from the adapter */
 	void *			comm_addr;	/* Base address of Comm area */
@@ -1003,14 +1074,20 @@
 	 */
 #ifndef AAC_MIN_FOOTPRINT_SIZE
 #	define AAC_MIN_FOOTPRINT_SIZE 8192
+#	define AAC_MIN_SRC_BAR0_SIZE 0x400000
+#	define AAC_MIN_SRC_BAR1_SIZE 0x800
 #endif
 	union
 	{
 		struct sa_registers __iomem *sa;
 		struct rx_registers __iomem *rx;
 		struct rkt_registers __iomem *rkt;
+		struct {
+			struct src_registers __iomem *bar0;
+			char __iomem *bar1;
+		} src;
 	} regs;
-	volatile void __iomem *base;
+	volatile void __iomem *base, *dbg_base_mapped;
 	volatile struct rx_inbound __iomem *IndexRegs;
 	u32			OIMR; /* Mask Register Cache */
 	/*
@@ -1031,9 +1108,8 @@
 	u8			comm_interface;
 #	define AAC_COMM_PRODUCER 0
 #	define AAC_COMM_MESSAGE  1
-	/* macro side-effects BEWARE */
-#	define			raw_io_interface \
-	  init->InitStructRevision==cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION_4)
+#	define AAC_COMM_MESSAGE_TYPE1	3
+	u8			raw_io_interface;
 	u8			raw_io_64;
 	u8			printf_enabled;
 	u8			in_reset;
@@ -1789,6 +1865,10 @@
 #define DoorBellAdapterNormCmdNotFull	(1<<3)	/* Adapter -> Host */
 #define DoorBellAdapterNormRespNotFull	(1<<4)	/* Adapter -> Host */
 #define DoorBellPrintfReady		(1<<5)	/* Adapter -> Host */
+#define DoorBellAifPending		(1<<6)	/* Adapter -> Host */
+
+/* PMC specific outbound doorbell bits */
+#define PmDoorBellResponseSent		(1<<1)	/* Adapter -> Host */
 
 /*
  *	For FIB communication, we need all of the following things
@@ -1831,6 +1911,9 @@
 #define		AifReqAPIJobUpdate	109	/* Update a job report from the API */
 #define		AifReqAPIJobFinish	110	/* Finish a job from the API */
 
+/* PMC NEW COMM: Request the event data */
+#define		AifReqEvent		200
+
 /*
  *	Adapter Initiated FIB command structures. Start with the adapter
  *	initiated FIBs that really come from the adapter, and get responded
@@ -1886,10 +1969,13 @@
 int aac_rkt_init(struct aac_dev *dev);
 int aac_nark_init(struct aac_dev *dev);
 int aac_sa_init(struct aac_dev *dev);
+int aac_src_init(struct aac_dev *dev);
 int aac_queue_get(struct aac_dev * dev, u32 * index, u32 qid, struct hw_fib * hw_fib, int wait, struct fib * fibptr, unsigned long *nonotify);
 unsigned int aac_response_normal(struct aac_queue * q);
 unsigned int aac_command_normal(struct aac_queue * q);
-unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index);
+unsigned int aac_intr_normal(struct aac_dev *dev, u32 Index,
+			int isAif, int isFastResponse,
+			struct hw_fib *aif_fib);
 int aac_reset_adapter(struct aac_dev * dev, int forced);
 int aac_check_health(struct aac_dev * dev);
 int aac_command_thread(void *data);

diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index 645ddd9..8a0b330 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c

@@ -5,7 +5,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by

diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index a726148..7ac8fdb 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c

@@ -5,7 +5,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -52,12 +53,16 @@
 	unsigned long size, align;
 	const unsigned long fibsize = 4096;
 	const unsigned long printfbufsiz = 256;
+	unsigned long host_rrq_size = 0;
 	struct aac_init *init;
 	dma_addr_t phys;
 	unsigned long aac_max_hostphysmempages;
 
-	size = fibsize + sizeof(struct aac_init) + commsize + commalign + printfbufsiz;
-
+	if (dev->comm_interface == AAC_COMM_MESSAGE_TYPE1)
+		host_rrq_size = (dev->scsi_host_ptr->can_queue
+			+ AAC_NUM_MGT_FIB) * sizeof(u32);
+	size = fibsize + sizeof(struct aac_init) + commsize +
+			commalign + printfbufsiz + host_rrq_size;
  
 	base = pci_alloc_consistent(dev->pdev, size, &phys);
 
@@ -70,8 +75,14 @@
 	dev->comm_phys = phys;
 	dev->comm_size = size;
 	
-	dev->init = (struct aac_init *)(base + fibsize);
-	dev->init_pa = phys + fibsize;
+	if (dev->comm_interface == AAC_COMM_MESSAGE_TYPE1) {
+		dev->host_rrq = (u32 *)(base + fibsize);
+		dev->host_rrq_pa = phys + fibsize;
+		memset(dev->host_rrq, 0, host_rrq_size);
+	}
+
+	dev->init = (struct aac_init *)(base + fibsize + host_rrq_size);
+	dev->init_pa = phys + fibsize + host_rrq_size;
 
 	init = dev->init;
 
@@ -106,8 +117,13 @@
 
 	init->InitFlags = 0;
 	if (dev->comm_interface == AAC_COMM_MESSAGE) {
-		init->InitFlags = cpu_to_le32(INITFLAGS_NEW_COMM_SUPPORTED);
+		init->InitFlags |= cpu_to_le32(INITFLAGS_NEW_COMM_SUPPORTED);
 		dprintk((KERN_WARNING"aacraid: New Comm Interface enabled\n"));
+	} else if (dev->comm_interface == AAC_COMM_MESSAGE_TYPE1) {
+		init->InitStructRevision = cpu_to_le32(ADAPTER_INIT_STRUCT_REVISION_6);
+		init->InitFlags |= cpu_to_le32(INITFLAGS_NEW_COMM_TYPE1_SUPPORTED);
+		dprintk((KERN_WARNING
+			"aacraid: New Comm Interface type1 enabled\n"));
 	}
 	init->InitFlags |= cpu_to_le32(INITFLAGS_DRIVER_USES_UTC_TIME |
 				       INITFLAGS_DRIVER_SUPPORTS_PM);
@@ -115,11 +131,18 @@
 	init->MaxIoSize = cpu_to_le32(dev->scsi_host_ptr->max_sectors << 9);
 	init->MaxFibSize = cpu_to_le32(dev->max_fib_size);
 
+	init->MaxNumAif = cpu_to_le32(dev->max_num_aif);
+	init->HostRRQ_AddrHigh = (u32)((u64)dev->host_rrq_pa >> 32);
+	init->HostRRQ_AddrLow = (u32)(dev->host_rrq_pa & 0xffffffff);
+
+
 	/*
 	 * Increment the base address by the amount already used
 	 */
-	base = base + fibsize + sizeof(struct aac_init);
-	phys = (dma_addr_t)((ulong)phys + fibsize + sizeof(struct aac_init));
+	base = base + fibsize + host_rrq_size + sizeof(struct aac_init);
+	phys = (dma_addr_t)((ulong)phys + fibsize + host_rrq_size +
+		sizeof(struct aac_init));
+
 	/*
 	 *	Align the beginning of Headers to commalign
 	 */
@@ -314,15 +337,22 @@
 		- sizeof(struct aac_write) + sizeof(struct sgentry))
 			/ sizeof(struct sgentry);
 	dev->comm_interface = AAC_COMM_PRODUCER;
-	dev->raw_io_64 = 0;
+	dev->raw_io_interface = dev->raw_io_64 = 0;
+
 	if ((!aac_adapter_sync_cmd(dev, GET_ADAPTER_PROPERTIES,
 		0, 0, 0, 0, 0, 0, status+0, status+1, status+2, NULL, NULL)) &&
 	 		(status[0] == 0x00000001)) {
 		if (status[1] & le32_to_cpu(AAC_OPT_NEW_COMM_64))
 			dev->raw_io_64 = 1;
-		if (dev->a_ops.adapter_comm &&
-		    (status[1] & le32_to_cpu(AAC_OPT_NEW_COMM)))
-			dev->comm_interface = AAC_COMM_MESSAGE;
+		if (dev->a_ops.adapter_comm) {
+			if (status[1] & le32_to_cpu(AAC_OPT_NEW_COMM_TYPE1)) {
+				dev->comm_interface = AAC_COMM_MESSAGE_TYPE1;
+				dev->raw_io_interface = 1;
+			} else if (status[1] & le32_to_cpu(AAC_OPT_NEW_COMM)) {
+				dev->comm_interface = AAC_COMM_MESSAGE;
+				dev->raw_io_interface = 1;
+			}
+		}
 		if ((dev->comm_interface == AAC_COMM_MESSAGE) &&
 		    (status[2] > dev->base_size)) {
 			aac_adapter_ioremap(dev, 0);
@@ -350,10 +380,12 @@
 		 *	status[3] & 0xFFFF	maximum number FIBs outstanding
 		 */
 		host->max_sectors = (status[1] >> 16) << 1;
-		dev->max_fib_size = status[1] & 0xFFFF;
+		/* Multiple of 32 for PMC */
+		dev->max_fib_size = status[1] & 0xFFE0;
 		host->sg_tablesize = status[2] >> 16;
 		dev->sg_tablesize = status[2] & 0xFFFF;
 		host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB;
+		dev->max_num_aif = status[4] & 0xFFFF;
 		/*
 		 *	NOTE:
 		 *	All these overrides are based on a fixed internal

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 060ac4b..dd7ad3b 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c

@@ -5,7 +5,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -63,9 +64,11 @@
 	  "allocate hardware fibs pci_alloc_consistent(%p, %d * (%d + %d), %p)\n",
 	  dev->pdev, dev->max_fib_size, dev->scsi_host_ptr->can_queue,
 	  AAC_NUM_MGT_FIB, &dev->hw_fib_pa));
-	if((dev->hw_fib_va = pci_alloc_consistent(dev->pdev, dev->max_fib_size
-	  * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB),
-	  &dev->hw_fib_pa))==NULL)
+	dev->hw_fib_va = pci_alloc_consistent(dev->pdev,
+		(dev->max_fib_size + sizeof(struct aac_fib_xporthdr))
+		* (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB) + (ALIGN32 - 1),
+		&dev->hw_fib_pa);
+	if (dev->hw_fib_va == NULL)
 		return -ENOMEM;
 	return 0;
 }
@@ -110,9 +113,22 @@
 	if (i<0)
 		return -ENOMEM;
 
+	/* 32 byte alignment for PMC */
+	hw_fib_pa = (dev->hw_fib_pa + (ALIGN32 - 1)) & ~(ALIGN32 - 1);
+	dev->hw_fib_va = (struct hw_fib *)((unsigned char *)dev->hw_fib_va +
+		(hw_fib_pa - dev->hw_fib_pa));
+	dev->hw_fib_pa = hw_fib_pa;
+	memset(dev->hw_fib_va, 0,
+		(dev->max_fib_size + sizeof(struct aac_fib_xporthdr)) *
+		(dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB));
+
+	/* add Xport header */
+	dev->hw_fib_va = (struct hw_fib *)((unsigned char *)dev->hw_fib_va +
+		sizeof(struct aac_fib_xporthdr));
+	dev->hw_fib_pa += sizeof(struct aac_fib_xporthdr);
+
 	hw_fib = dev->hw_fib_va;
 	hw_fib_pa = dev->hw_fib_pa;
-	memset(hw_fib, 0, dev->max_fib_size * (dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB));
 	/*
 	 *	Initialise the fibs
 	 */
@@ -129,8 +145,10 @@
 		hw_fib->header.XferState = cpu_to_le32(0xffffffff);
 		hw_fib->header.SenderSize = cpu_to_le16(dev->max_fib_size);
 		fibptr->hw_fib_pa = hw_fib_pa;
-		hw_fib = (struct hw_fib *)((unsigned char *)hw_fib + dev->max_fib_size);
-		hw_fib_pa = hw_fib_pa + dev->max_fib_size;
+		hw_fib = (struct hw_fib *)((unsigned char *)hw_fib +
+			dev->max_fib_size + sizeof(struct aac_fib_xporthdr));
+		hw_fib_pa = hw_fib_pa +
+			dev->max_fib_size + sizeof(struct aac_fib_xporthdr);
 	}
 	/*
 	 *	Add the fib chain to the free list
@@ -664,9 +682,14 @@
 	unsigned long nointr = 0;
 	unsigned long qflags;
 
+	if (dev->comm_interface == AAC_COMM_MESSAGE_TYPE1) {
+		kfree(hw_fib);
+		return 0;
+	}
+
 	if (hw_fib->header.XferState == 0) {
 		if (dev->comm_interface == AAC_COMM_MESSAGE)
-			kfree (hw_fib);
+			kfree(hw_fib);
 		return 0;
 	}
 	/*
@@ -674,7 +697,7 @@
 	 */
 	if (hw_fib->header.StructType != FIB_MAGIC) {
 		if (dev->comm_interface == AAC_COMM_MESSAGE)
-			kfree (hw_fib);
+			kfree(hw_fib);
 		return -EINVAL;
 	}
 	/*

diff --git a/drivers/scsi/aacraid/dpcsup.c b/drivers/scsi/aacraid/dpcsup.c
index 9c7408fe..f0c66a8 100644
--- a/drivers/scsi/aacraid/dpcsup.c
+++ b/drivers/scsi/aacraid/dpcsup.c

@@ -5,7 +5,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -228,6 +229,48 @@
 	return 0;
 }
 
+/*
+ *
+ * aac_aif_callback
+ * @context: the context set in the fib - here it is scsi cmd
+ * @fibptr: pointer to the fib
+ *
+ * Handles the AIFs - new method (SRC)
+ *
+ */
+
+static void aac_aif_callback(void *context, struct fib * fibptr)
+{
+	struct fib *fibctx;
+	struct aac_dev *dev;
+	struct aac_aifcmd *cmd;
+	int status;
+
+	fibctx = (struct fib *)context;
+	BUG_ON(fibptr == NULL);
+	dev = fibptr->dev;
+
+	if (fibptr->hw_fib_va->header.XferState &
+	    cpu_to_le32(NoMoreAifDataAvailable)) {
+		aac_fib_complete(fibptr);
+		aac_fib_free(fibptr);
+		return;
+	}
+
+	aac_intr_normal(dev, 0, 1, 0, fibptr->hw_fib_va);
+
+	aac_fib_init(fibctx);
+	cmd = (struct aac_aifcmd *) fib_data(fibctx);
+	cmd->command = cpu_to_le32(AifReqEvent);
+
+	status = aac_fib_send(AifRequest,
+		fibctx,
+		sizeof(struct hw_fib)-sizeof(struct aac_fibhdr),
+		FsaNormal,
+		0, 1,
+		(fib_callback)aac_aif_callback, fibctx);
+}
+
 
 /**
  *	aac_intr_normal	-	Handle command replies
@@ -238,19 +281,17 @@
  *	know there is a response on our normal priority queue. We will pull off
  *	all QE there are and wake up all the waiters before exiting.
  */
-
-unsigned int aac_intr_normal(struct aac_dev * dev, u32 index)
+unsigned int aac_intr_normal(struct aac_dev *dev, u32 index,
+			int isAif, int isFastResponse, struct hw_fib *aif_fib)
 {
 	unsigned long mflags;
 	dprintk((KERN_INFO "aac_intr_normal(%p,%x)\n", dev, index));
-	if ((index & 0x00000002L)) {
+	if (isAif == 1) {	/* AIF - common */
 		struct hw_fib * hw_fib;
 		struct fib * fib;
 		struct aac_queue *q = &dev->queues->queue[HostNormCmdQueue];
 		unsigned long flags;
 
-		if (index == 0xFFFFFFFEL) /* Special Case */
-			return 0;	  /* Do nothing */
 		/*
 		 *	Allocate a FIB. For non queued stuff we can just use
 		 * the stack so we are happy. We need a fib object in order to
@@ -263,8 +304,13 @@
 			kfree (fib);
 			return 1;
 		}
-		memcpy(hw_fib, (struct hw_fib *)(((uintptr_t)(dev->regs.sa)) +
-		  (index & ~0x00000002L)), sizeof(struct hw_fib));
+		if (aif_fib != NULL) {
+			memcpy(hw_fib, aif_fib, sizeof(struct hw_fib));
+		} else {
+			memcpy(hw_fib,
+				(struct hw_fib *)(((uintptr_t)(dev->regs.sa)) +
+				index), sizeof(struct hw_fib));
+		}
 		INIT_LIST_HEAD(&fib->fiblink);
 		fib->type = FSAFS_NTC_FIB_CONTEXT;
 		fib->size = sizeof(struct fib);
@@ -277,9 +323,26 @@
 	        wake_up_interruptible(&q->cmdready);
 		spin_unlock_irqrestore(q->lock, flags);
 		return 1;
+	} else if (isAif == 2) {	/* AIF - new (SRC) */
+		struct fib *fibctx;
+		struct aac_aifcmd *cmd;
+
+		fibctx = aac_fib_alloc(dev);
+		if (!fibctx)
+			return 1;
+		aac_fib_init(fibctx);
+
+		cmd = (struct aac_aifcmd *) fib_data(fibctx);
+		cmd->command = cpu_to_le32(AifReqEvent);
+
+		return aac_fib_send(AifRequest,
+			fibctx,
+			sizeof(struct hw_fib)-sizeof(struct aac_fibhdr),
+			FsaNormal,
+			0, 1,
+			(fib_callback)aac_aif_callback, fibctx);
 	} else {
-		int fast = index & 0x01;
-		struct fib * fib = &dev->fibs[index >> 2];
+		struct fib *fib = &dev->fibs[index];
 		struct hw_fib * hwfib = fib->hw_fib_va;
 
 		/*
@@ -298,7 +361,7 @@
 			return 0;
 		}
 
-		if (fast) {
+		if (isFastResponse) {
 			/*
 			 *	Doctor the fib
 			 */

diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 2c93d94..4ff2652 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c

@@ -5,7 +5,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -54,7 +55,7 @@
 
 #include "aacraid.h"
 
-#define AAC_DRIVER_VERSION		"1.1-5"
+#define AAC_DRIVER_VERSION		"1.1-7"
 #ifndef AAC_DRIVER_BRANCH
 #define AAC_DRIVER_BRANCH		""
 #endif
@@ -161,6 +162,7 @@
 	{ 0x9005, 0x0285, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 59 }, /* Adaptec Catch All */
 	{ 0x9005, 0x0286, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 60 }, /* Adaptec Rocket Catch All */
 	{ 0x9005, 0x0288, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 61 }, /* Adaptec NEMER/ARK Catch All */
+	{ 0x9005, 0x028b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 62 }, /* Adaptec PMC Catch All */
 	{ 0,}
 };
 MODULE_DEVICE_TABLE(pci, aac_pci_tbl);
@@ -235,7 +237,8 @@
 	{ aac_rx_init, "aacraid",  "Legend  ", "RAID            ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG | AAC_QUIRK_SCSI_32 }, /* Legend Catchall */
 	{ aac_rx_init, "aacraid",  "ADAPTEC ", "RAID            ", 2 }, /* Adaptec Catch All */
 	{ aac_rkt_init, "aacraid", "ADAPTEC ", "RAID            ", 2 }, /* Adaptec Rocket Catch All */
-	{ aac_nark_init, "aacraid", "ADAPTEC ", "RAID            ", 2 } /* Adaptec NEMER/ARK Catch All */
+	{ aac_nark_init, "aacraid", "ADAPTEC ", "RAID           ", 2 }, /* Adaptec NEMER/ARK Catch All */
+	{ aac_src_init, "aacraid", "ADAPTEC ", "RAID            ", 2 } /* Adaptec PMC Catch All */
 };
 
 /**
@@ -653,8 +656,10 @@
 	 * This adapter needs a blind reset, only do so for Adapters that
 	 * support a register, instead of a commanded, reset.
 	 */
-	if ((aac->supplement_adapter_info.SupportedOptions2 &
-	   AAC_OPTION_MU_RESET) &&
+	if (((aac->supplement_adapter_info.SupportedOptions2 &
+	  AAC_OPTION_MU_RESET) ||
+	  (aac->supplement_adapter_info.SupportedOptions2 &
+	  AAC_OPTION_DOORBELL_RESET)) &&
 	  aac_check_reset &&
 	  ((aac_check_reset != 1) ||
 	   !(aac->supplement_adapter_info.SupportedOptions2 &

diff --git a/drivers/scsi/aacraid/nark.c b/drivers/scsi/aacraid/nark.c
index c55f7c8..f397d21 100644
--- a/drivers/scsi/aacraid/nark.c
+++ b/drivers/scsi/aacraid/nark.c

@@ -4,7 +4,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2006-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by

diff --git a/drivers/scsi/aacraid/rkt.c b/drivers/scsi/aacraid/rkt.c
index 16d8db5..be44de9 100644
--- a/drivers/scsi/aacraid/rkt.c
+++ b/drivers/scsi/aacraid/rkt.c

@@ -5,7 +5,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by

diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index 84d77fd..ce530f1 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c

@@ -5,7 +5,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -84,15 +85,35 @@
 
 static irqreturn_t aac_rx_intr_message(int irq, void *dev_id)
 {
+	int isAif, isFastResponse, isSpecial;
 	struct aac_dev *dev = dev_id;
 	u32 Index = rx_readl(dev, MUnit.OutboundQueue);
 	if (unlikely(Index == 0xFFFFFFFFL))
 		Index = rx_readl(dev, MUnit.OutboundQueue);
 	if (likely(Index != 0xFFFFFFFFL)) {
 		do {
-			if (unlikely(aac_intr_normal(dev, Index))) {
-				rx_writel(dev, MUnit.OutboundQueue, Index);
-				rx_writel(dev, MUnit.ODR, DoorBellAdapterNormRespReady);
+			isAif = isFastResponse = isSpecial = 0;
+			if (Index & 0x00000002L) {
+				isAif = 1;
+				if (Index == 0xFFFFFFFEL)
+					isSpecial = 1;
+				Index &= ~0x00000002L;
+			} else {
+				if (Index & 0x00000001L)
+					isFastResponse = 1;
+				Index >>= 2;
+			}
+			if (!isSpecial) {
+				if (unlikely(aac_intr_normal(dev,
+						Index, isAif,
+						isFastResponse, NULL))) {
+					rx_writel(dev,
+						MUnit.OutboundQueue,
+						Index);
+					rx_writel(dev,
+						MUnit.ODR,
+						DoorBellAdapterNormRespReady);
+				}
 			}
 			Index = rx_readl(dev, MUnit.OutboundQueue);
 		} while (Index != 0xFFFFFFFFL);
@@ -631,6 +652,10 @@
 			name, instance);
 		goto error_iounmap;
 	}
+	dev->dbg_base = dev->scsi_host_ptr->base;
+	dev->dbg_base_mapped = dev->base;
+	dev->dbg_size = dev->base_size;
+
 	aac_adapter_enable_int(dev);
 	/*
 	 *	Tell the adapter that all is configured, and it can

diff --git a/drivers/scsi/aacraid/sa.c b/drivers/scsi/aacraid/sa.c
index 622c21c..e5d4457 100644
--- a/drivers/scsi/aacraid/sa.c
+++ b/drivers/scsi/aacraid/sa.c

@@ -5,7 +5,8 @@
  * based on the old aacraid driver that is..
  * Adaptec aacraid device driver for Linux.
  *
- * Copyright (c) 2000-2007 Adaptec, Inc. (aacraid@adaptec.com)
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -391,6 +392,10 @@
 			name, instance);
 		goto error_iounmap;
 	}
+	dev->dbg_base = dev->scsi_host_ptr->base;
+	dev->dbg_base_mapped = dev->base;
+	dev->dbg_size = dev->base_size;
+
 	aac_adapter_enable_int(dev);
 
 	/*

diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
new file mode 100644
index 0000000..c204946
--- /dev/null
+++ b/drivers/scsi/aacraid/src.c

@@ -0,0 +1,594 @@
+/*
+ *	Adaptec AAC series RAID controller driver
+ *	(c) Copyright 2001 Red Hat Inc.
+ *
+ * based on the old aacraid driver that is..
+ * Adaptec aacraid device driver for Linux.
+ *
+ * Copyright (c) 2000-2010 Adaptec, Inc.
+ *               2010 PMC-Sierra, Inc. (aacraid@pmc-sierra.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Module Name:
+ *  src.c
+ *
+ * Abstract: Hardware Device Interface for PMC SRC based controllers
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/version.h>
+#include <linux/completion.h>
+#include <linux/time.h>
+#include <linux/interrupt.h>
+#include <scsi/scsi_host.h>
+
+#include "aacraid.h"
+
+static irqreturn_t aac_src_intr_message(int irq, void *dev_id)
+{
+	struct aac_dev *dev = dev_id;
+	unsigned long bellbits, bellbits_shifted;
+	int our_interrupt = 0;
+	int isFastResponse;
+	u32 index, handle;
+
+	bellbits = src_readl(dev, MUnit.ODR_R);
+	if (bellbits & PmDoorBellResponseSent) {
+		bellbits = PmDoorBellResponseSent;
+		/* handle async. status */
+		our_interrupt = 1;
+		index = dev->host_rrq_idx;
+		if (dev->host_rrq[index] == 0) {
+			u32 old_index = index;
+			/* adjust index */
+			do {
+				index++;
+				if (index == dev->scsi_host_ptr->can_queue +
+							AAC_NUM_MGT_FIB)
+					index = 0;
+				if (dev->host_rrq[index] != 0)
+					break;
+			} while (index != old_index);
+			dev->host_rrq_idx = index;
+		}
+		for (;;) {
+			isFastResponse = 0;
+			/* remove toggle bit (31) */
+			handle = (dev->host_rrq[index] & 0x7fffffff);
+			/* check fast response bit (30) */
+			if (handle & 0x40000000)
+				isFastResponse = 1;
+			handle &= 0x0000ffff;
+			if (handle == 0)
+				break;
+
+			aac_intr_normal(dev, handle-1, 0, isFastResponse, NULL);
+
+			dev->host_rrq[index++] = 0;
+			if (index == dev->scsi_host_ptr->can_queue +
+						AAC_NUM_MGT_FIB)
+				index = 0;
+			dev->host_rrq_idx = index;
+		}
+	} else {
+		bellbits_shifted = (bellbits >> SRC_ODR_SHIFT);
+		if (bellbits_shifted & DoorBellAifPending) {
+			our_interrupt = 1;
+			/* handle AIF */
+			aac_intr_normal(dev, 0, 2, 0, NULL);
+		}
+	}
+
+	if (our_interrupt) {
+		src_writel(dev, MUnit.ODR_C, bellbits);
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+/**
+ *	aac_src_disable_interrupt	-	Disable interrupts
+ *	@dev: Adapter
+ */
+
+static void aac_src_disable_interrupt(struct aac_dev *dev)
+{
+	src_writel(dev, MUnit.OIMR, dev->OIMR = 0xffffffff);
+}
+
+/**
+ *	aac_src_enable_interrupt_message	-	Enable interrupts
+ *	@dev: Adapter
+ */
+
+static void aac_src_enable_interrupt_message(struct aac_dev *dev)
+{
+	src_writel(dev, MUnit.OIMR, dev->OIMR = 0xfffffff8);
+}
+
+/**
+ *	src_sync_cmd	-	send a command and wait
+ *	@dev: Adapter
+ *	@command: Command to execute
+ *	@p1: first parameter
+ *	@ret: adapter status
+ *
+ *	This routine will send a synchronous command to the adapter and wait
+ *	for its	completion.
+ */
+
+static int src_sync_cmd(struct aac_dev *dev, u32 command,
+	u32 p1, u32 p2, u32 p3, u32 p4, u32 p5, u32 p6,
+	u32 *status, u32 * r1, u32 * r2, u32 * r3, u32 * r4)
+{
+	unsigned long start;
+	int ok;
+
+	/*
+	 *	Write the command into Mailbox 0
+	 */
+	writel(command, &dev->IndexRegs->Mailbox[0]);
+	/*
+	 *	Write the parameters into Mailboxes 1 - 6
+	 */
+	writel(p1, &dev->IndexRegs->Mailbox[1]);
+	writel(p2, &dev->IndexRegs->Mailbox[2]);
+	writel(p3, &dev->IndexRegs->Mailbox[3]);
+	writel(p4, &dev->IndexRegs->Mailbox[4]);
+
+	/*
+	 *	Clear the synch command doorbell to start on a clean slate.
+	 */
+	src_writel(dev, MUnit.ODR_C, OUTBOUNDDOORBELL_0 << SRC_ODR_SHIFT);
+
+	/*
+	 *	Disable doorbell interrupts
+	 */
+	src_writel(dev, MUnit.OIMR, dev->OIMR = 0xffffffff);
+
+	/*
+	 *	Force the completion of the mask register write before issuing
+	 *	the interrupt.
+	 */
+	src_readl(dev, MUnit.OIMR);
+
+	/*
+	 *	Signal that there is a new synch command
+	 */
+	src_writel(dev, MUnit.IDR, INBOUNDDOORBELL_0 << SRC_IDR_SHIFT);
+
+	ok = 0;
+	start = jiffies;
+
+	/*
+	 *	Wait up to 30 seconds
+	 */
+	while (time_before(jiffies, start+30*HZ)) {
+		/* Delay 5 microseconds to let Mon960 get info. */
+		udelay(5);
+
+		/* Mon960 will set doorbell0 bit
+		 * when it has completed the command
+		 */
+		if ((src_readl(dev, MUnit.ODR_R) >> SRC_ODR_SHIFT) & OUTBOUNDDOORBELL_0) {
+			/* Clear the doorbell */
+			src_writel(dev,
+				MUnit.ODR_C,
+				OUTBOUNDDOORBELL_0 << SRC_ODR_SHIFT);
+			ok = 1;
+			break;
+		}
+
+		 /* Yield the processor in case we are slow */
+		msleep(1);
+	}
+	if (unlikely(ok != 1)) {
+		 /* Restore interrupt mask even though we timed out */
+		aac_adapter_enable_int(dev);
+		return -ETIMEDOUT;
+	}
+
+	 /* Pull the synch status from Mailbox 0 */
+	if (status)
+		*status = readl(&dev->IndexRegs->Mailbox[0]);
+	if (r1)
+		*r1 = readl(&dev->IndexRegs->Mailbox[1]);
+	if (r2)
+		*r2 = readl(&dev->IndexRegs->Mailbox[2]);
+	if (r3)
+		*r3 = readl(&dev->IndexRegs->Mailbox[3]);
+	if (r4)
+		*r4 = readl(&dev->IndexRegs->Mailbox[4]);
+
+	 /* Clear the synch command doorbell */
+	src_writel(dev, MUnit.ODR_C, OUTBOUNDDOORBELL_0 << SRC_ODR_SHIFT);
+
+	 /* Restore interrupt mask */
+	aac_adapter_enable_int(dev);
+	return 0;
+
+}
+
+/**
+ *	aac_src_interrupt_adapter	-	interrupt adapter
+ *	@dev: Adapter
+ *
+ *	Send an interrupt to the i960 and breakpoint it.
+ */
+
+static void aac_src_interrupt_adapter(struct aac_dev *dev)
+{
+	src_sync_cmd(dev, BREAKPOINT_REQUEST,
+		0, 0, 0, 0, 0, 0,
+		NULL, NULL, NULL, NULL, NULL);
+}
+
+/**
+ *	aac_src_notify_adapter		-	send an event to the adapter
+ *	@dev: Adapter
+ *	@event: Event to send
+ *
+ *	Notify the i960 that something it probably cares about has
+ *	happened.
+ */
+
+static void aac_src_notify_adapter(struct aac_dev *dev, u32 event)
+{
+	switch (event) {
+
+	case AdapNormCmdQue:
+		src_writel(dev, MUnit.ODR_C,
+			INBOUNDDOORBELL_1 << SRC_ODR_SHIFT);
+		break;
+	case HostNormRespNotFull:
+		src_writel(dev, MUnit.ODR_C,
+			INBOUNDDOORBELL_4 << SRC_ODR_SHIFT);
+		break;
+	case AdapNormRespQue:
+		src_writel(dev, MUnit.ODR_C,
+			INBOUNDDOORBELL_2 << SRC_ODR_SHIFT);
+		break;
+	case HostNormCmdNotFull:
+		src_writel(dev, MUnit.ODR_C,
+			INBOUNDDOORBELL_3 << SRC_ODR_SHIFT);
+		break;
+	case FastIo:
+		src_writel(dev, MUnit.ODR_C,
+			INBOUNDDOORBELL_6 << SRC_ODR_SHIFT);
+		break;
+	case AdapPrintfDone:
+		src_writel(dev, MUnit.ODR_C,
+			INBOUNDDOORBELL_5 << SRC_ODR_SHIFT);
+		break;
+	default:
+		BUG();
+		break;
+	}
+}
+
+/**
+ *	aac_src_start_adapter		-	activate adapter
+ *	@dev:	Adapter
+ *
+ *	Start up processing on an i960 based AAC adapter
+ */
+
+static void aac_src_start_adapter(struct aac_dev *dev)
+{
+	struct aac_init *init;
+
+	init = dev->init;
+	init->HostElapsedSeconds = cpu_to_le32(get_seconds());
+
+	/* We can only use a 32 bit address here */
+	src_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa,
+	  0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
+}
+
+/**
+ *	aac_src_check_health
+ *	@dev: device to check if healthy
+ *
+ *	Will attempt to determine if the specified adapter is alive and
+ *	capable of handling requests, returning 0 if alive.
+ */
+static int aac_src_check_health(struct aac_dev *dev)
+{
+	u32 status = src_readl(dev, MUnit.OMR);
+
+	/*
+	 *	Check to see if the board failed any self tests.
+	 */
+	if (unlikely(status & SELF_TEST_FAILED))
+		return -1;
+
+	/*
+	 *	Check to see if the board panic'd.
+	 */
+	if (unlikely(status & KERNEL_PANIC))
+		return (status >> 16) & 0xFF;
+	/*
+	 *	Wait for the adapter to be up and running.
+	 */
+	if (unlikely(!(status & KERNEL_UP_AND_RUNNING)))
+		return -3;
+	/*
+	 *	Everything is OK
+	 */
+	return 0;
+}
+
+/**
+ *	aac_src_deliver_message
+ *	@fib: fib to issue
+ *
+ *	Will send a fib, returning 0 if successful.
+ */
+static int aac_src_deliver_message(struct fib *fib)
+{
+	struct aac_dev *dev = fib->dev;
+	struct aac_queue *q = &dev->queues->queue[AdapNormCmdQueue];
+	unsigned long qflags;
+	u32 fibsize;
+	u64 address;
+	struct aac_fib_xporthdr *pFibX;
+
+	spin_lock_irqsave(q->lock, qflags);
+	q->numpending++;
+	spin_unlock_irqrestore(q->lock, qflags);
+
+	/* Calculate the amount to the fibsize bits */
+	fibsize = (sizeof(struct aac_fib_xporthdr) +
+		fib->hw_fib_va->header.Size + 127) / 128 - 1;
+	if (fibsize > (ALIGN32 - 1))
+		fibsize = ALIGN32 - 1;
+
+    /* Fill XPORT header */
+	pFibX = (struct aac_fib_xporthdr *)
+		((unsigned char *)fib->hw_fib_va -
+		sizeof(struct aac_fib_xporthdr));
+	pFibX->Handle = fib->hw_fib_va->header.SenderData + 1;
+	pFibX->HostAddress = fib->hw_fib_pa;
+	pFibX->Size = fib->hw_fib_va->header.Size;
+	address = fib->hw_fib_pa - (u64)sizeof(struct aac_fib_xporthdr);
+
+	src_writel(dev, MUnit.IQ_H, (u32)(address >> 32));
+	src_writel(dev, MUnit.IQ_L, (u32)(address & 0xffffffff) + fibsize);
+	return 0;
+}
+
+/**
+ *	aac_src_ioremap
+ *	@size: mapping resize request
+ *
+ */
+static int aac_src_ioremap(struct aac_dev *dev, u32 size)
+{
+	if (!size) {
+		iounmap(dev->regs.src.bar0);
+		dev->regs.src.bar0 = NULL;
+		iounmap(dev->base);
+		dev->base = NULL;
+		return 0;
+	}
+	dev->regs.src.bar1 = ioremap(pci_resource_start(dev->pdev, 2),
+		AAC_MIN_SRC_BAR1_SIZE);
+	dev->base = NULL;
+	if (dev->regs.src.bar1 == NULL)
+		return -1;
+	dev->base = dev->regs.src.bar0 = ioremap(dev->scsi_host_ptr->base,
+				size);
+	if (dev->base == NULL) {
+		iounmap(dev->regs.src.bar1);
+		dev->regs.src.bar1 = NULL;
+		return -1;
+	}
+	dev->IndexRegs = &((struct src_registers __iomem *)
+		dev->base)->IndexRegs;
+	return 0;
+}
+
+static int aac_src_restart_adapter(struct aac_dev *dev, int bled)
+{
+	u32 var, reset_mask;
+
+	if (bled >= 0) {
+		if (bled)
+			printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
+				dev->name, dev->id, bled);
+		bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS,
+			0, 0, 0, 0, 0, 0, &var, &reset_mask, NULL, NULL, NULL);
+			if (bled || (var != 0x00000001))
+				bled = -EINVAL;
+		if (dev->supplement_adapter_info.SupportedOptions2 &
+			AAC_OPTION_DOORBELL_RESET) {
+			src_writel(dev, MUnit.IDR, reset_mask);
+			msleep(5000); /* Delay 5 seconds */
+		}
+	}
+
+	if (src_readl(dev, MUnit.OMR) & KERNEL_PANIC)
+		return -ENODEV;
+
+	if (startup_timeout < 300)
+		startup_timeout = 300;
+
+	return 0;
+}
+
+/**
+ *	aac_src_select_comm	-	Select communications method
+ *	@dev: Adapter
+ *	@comm: communications method
+ */
+int aac_src_select_comm(struct aac_dev *dev, int comm)
+{
+	switch (comm) {
+	case AAC_COMM_MESSAGE:
+		dev->a_ops.adapter_enable_int = aac_src_enable_interrupt_message;
+		dev->a_ops.adapter_intr = aac_src_intr_message;
+		dev->a_ops.adapter_deliver = aac_src_deliver_message;
+		break;
+	default:
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ *  aac_src_init	-	initialize an Cardinal Frey Bar card
+ *  @dev: device to configure
+ *
+ */
+
+int aac_src_init(struct aac_dev *dev)
+{
+	unsigned long start;
+	unsigned long status;
+	int restart = 0;
+	int instance = dev->id;
+	const char *name = dev->name;
+
+	dev->a_ops.adapter_ioremap = aac_src_ioremap;
+	dev->a_ops.adapter_comm = aac_src_select_comm;
+
+	dev->base_size = AAC_MIN_SRC_BAR0_SIZE;
+	if (aac_adapter_ioremap(dev, dev->base_size)) {
+		printk(KERN_WARNING "%s: unable to map adapter.\n", name);
+		goto error_iounmap;
+	}
+
+	/* Failure to reset here is an option ... */
+	dev->a_ops.adapter_sync_cmd = src_sync_cmd;
+	dev->a_ops.adapter_enable_int = aac_src_disable_interrupt;
+	if ((aac_reset_devices || reset_devices) &&
+		!aac_src_restart_adapter(dev, 0))
+		++restart;
+	/*
+	 *	Check to see if the board panic'd while booting.
+	 */
+	status = src_readl(dev, MUnit.OMR);
+	if (status & KERNEL_PANIC) {
+		if (aac_src_restart_adapter(dev, aac_src_check_health(dev)))
+			goto error_iounmap;
+		++restart;
+	}
+	/*
+	 *	Check to see if the board failed any self tests.
+	 */
+	status = src_readl(dev, MUnit.OMR);
+	if (status & SELF_TEST_FAILED) {
+		printk(KERN_ERR "%s%d: adapter self-test failed.\n",
+			dev->name, instance);
+		goto error_iounmap;
+	}
+	/*
+	 *	Check to see if the monitor panic'd while booting.
+	 */
+	if (status & MONITOR_PANIC) {
+		printk(KERN_ERR "%s%d: adapter monitor panic.\n",
+			dev->name, instance);
+		goto error_iounmap;
+	}
+	start = jiffies;
+	/*
+	 *	Wait for the adapter to be up and running. Wait up to 3 minutes
+	 */
+	while (!((status = src_readl(dev, MUnit.OMR)) &
+		KERNEL_UP_AND_RUNNING)) {
+		if ((restart &&
+		  (status & (KERNEL_PANIC|SELF_TEST_FAILED|MONITOR_PANIC))) ||
+		  time_after(jiffies, start+HZ*startup_timeout)) {
+			printk(KERN_ERR "%s%d: adapter kernel failed to start, init status = %lx.\n",
+					dev->name, instance, status);
+			goto error_iounmap;
+		}
+		if (!restart &&
+		  ((status & (KERNEL_PANIC|SELF_TEST_FAILED|MONITOR_PANIC)) ||
+		  time_after(jiffies, start + HZ *
+		  ((startup_timeout > 60)
+		    ? (startup_timeout - 60)
+		    : (startup_timeout / 2))))) {
+			if (likely(!aac_src_restart_adapter(dev,
+			    aac_src_check_health(dev))))
+				start = jiffies;
+			++restart;
+		}
+		msleep(1);
+	}
+	if (restart && aac_commit)
+		aac_commit = 1;
+	/*
+	 *	Fill in the common function dispatch table.
+	 */
+	dev->a_ops.adapter_interrupt = aac_src_interrupt_adapter;
+	dev->a_ops.adapter_disable_int = aac_src_disable_interrupt;
+	dev->a_ops.adapter_notify = aac_src_notify_adapter;
+	dev->a_ops.adapter_sync_cmd = src_sync_cmd;
+	dev->a_ops.adapter_check_health = aac_src_check_health;
+	dev->a_ops.adapter_restart = aac_src_restart_adapter;
+
+	/*
+	 *	First clear out all interrupts.  Then enable the one's that we
+	 *	can handle.
+	 */
+	aac_adapter_comm(dev, AAC_COMM_MESSAGE);
+	aac_adapter_disable_int(dev);
+	src_writel(dev, MUnit.ODR_C, 0xffffffff);
+	aac_adapter_enable_int(dev);
+
+	if (aac_init_adapter(dev) == NULL)
+		goto error_iounmap;
+	if (dev->comm_interface != AAC_COMM_MESSAGE_TYPE1)
+		goto error_iounmap;
+
+	dev->msi = aac_msi && !pci_enable_msi(dev->pdev);
+
+	if (request_irq(dev->pdev->irq, dev->a_ops.adapter_intr,
+			IRQF_SHARED|IRQF_DISABLED, "aacraid", dev) < 0) {
+
+		if (dev->msi)
+			pci_disable_msi(dev->pdev);
+
+		printk(KERN_ERR "%s%d: Interrupt unavailable.\n",
+			name, instance);
+		goto error_iounmap;
+	}
+	dev->dbg_base = pci_resource_start(dev->pdev, 2);
+	dev->dbg_base_mapped = dev->regs.src.bar1;
+	dev->dbg_size = AAC_MIN_SRC_BAR1_SIZE;
+
+	aac_adapter_enable_int(dev);
+	/*
+	 *	Tell the adapter that all is configured, and it can
+	 * start accepting requests
+	 */
+	aac_src_start_adapter(dev);
+
+	return 0;
+
+error_iounmap:
+
+	return -1;
+}

diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h
index df2fc09..b6d350a 100644
--- a/drivers/scsi/bnx2fc/bnx2fc.h
+++ b/drivers/scsi/bnx2fc/bnx2fc.h

@@ -62,7 +62,7 @@
 #include "bnx2fc_constants.h"
 
 #define BNX2FC_NAME		"bnx2fc"
-#define BNX2FC_VERSION		"1.0.0"
+#define BNX2FC_VERSION		"1.0.1"
 
 #define PFX			"bnx2fc: "
 
@@ -84,9 +84,15 @@
 #define BNX2FC_NUM_MAX_SESS	128
 #define BNX2FC_NUM_MAX_SESS_LOG	(ilog2(BNX2FC_NUM_MAX_SESS))
 
-#define BNX2FC_MAX_OUTSTANDING_CMNDS	4096
+#define BNX2FC_MAX_OUTSTANDING_CMNDS	2048
+#define BNX2FC_CAN_QUEUE		BNX2FC_MAX_OUTSTANDING_CMNDS
+#define BNX2FC_ELSTM_XIDS		BNX2FC_CAN_QUEUE
 #define BNX2FC_MIN_PAYLOAD		256
 #define BNX2FC_MAX_PAYLOAD		2048
+#define BNX2FC_MFS			\
+			(BNX2FC_MAX_PAYLOAD + sizeof(struct fc_frame_header))
+#define BNX2FC_MINI_JUMBO_MTU		2500
+
 
 #define BNX2FC_RQ_BUF_SZ		256
 #define BNX2FC_RQ_BUF_LOG_SZ		(ilog2(BNX2FC_RQ_BUF_SZ))
@@ -98,7 +104,8 @@
 #define BNX2FC_CONFQ_WQE_SIZE		(sizeof(struct fcoe_confqe))
 #define BNX2FC_5771X_DB_PAGE_SIZE	128
 
-#define BNX2FC_MAX_TASKS		BNX2FC_MAX_OUTSTANDING_CMNDS
+#define BNX2FC_MAX_TASKS		\
+			     (BNX2FC_MAX_OUTSTANDING_CMNDS + BNX2FC_ELSTM_XIDS)
 #define BNX2FC_TASK_SIZE		128
 #define	BNX2FC_TASKS_PER_PAGE		(PAGE_SIZE/BNX2FC_TASK_SIZE)
 #define BNX2FC_TASK_CTX_ARR_SZ		(BNX2FC_MAX_TASKS/BNX2FC_TASKS_PER_PAGE)
@@ -112,10 +119,10 @@
 #define BNX2FC_WRITE			(1 << 0)
 
 #define BNX2FC_MIN_XID			0
-#define BNX2FC_MAX_XID			(BNX2FC_MAX_OUTSTANDING_CMNDS - 1)
-#define FCOE_MIN_XID			(BNX2FC_MAX_OUTSTANDING_CMNDS)
-#define FCOE_MAX_XID		\
-			(BNX2FC_MAX_OUTSTANDING_CMNDS + (nr_cpu_ids * 256))
+#define BNX2FC_MAX_XID			\
+			(BNX2FC_MAX_OUTSTANDING_CMNDS + BNX2FC_ELSTM_XIDS - 1)
+#define FCOE_MIN_XID			(BNX2FC_MAX_XID + 1)
+#define FCOE_MAX_XID			(FCOE_MIN_XID + 4095)
 #define BNX2FC_MAX_LUN			0xFFFF
 #define BNX2FC_MAX_FCP_TGT		256
 #define BNX2FC_MAX_CMD_LEN		16
@@ -125,7 +132,6 @@
 
 #define BNX2FC_WAIT_CNT			120
 #define BNX2FC_FW_TIMEOUT		(3 * HZ)
-
 #define PORT_MAX			2
 
 #define CMD_SCSI_STATUS(Cmnd)		((Cmnd)->SCp.Status)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index e476e87..e2e6475 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c

@@ -21,7 +21,7 @@
 
 #define DRV_MODULE_NAME		"bnx2fc"
 #define DRV_MODULE_VERSION	BNX2FC_VERSION
-#define DRV_MODULE_RELDATE	"Jan 25, 2011"
+#define DRV_MODULE_RELDATE	"Mar 17, 2011"
 
 
 static char version[] __devinitdata =
@@ -437,17 +437,16 @@
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
 		schedule();
-		set_current_state(TASK_RUNNING);
 		spin_lock_bh(&bg->fcoe_rx_list.lock);
 		while ((skb = __skb_dequeue(&bg->fcoe_rx_list)) != NULL) {
 			spin_unlock_bh(&bg->fcoe_rx_list.lock);
 			bnx2fc_recv_frame(skb);
 			spin_lock_bh(&bg->fcoe_rx_list.lock);
 		}
+		__set_current_state(TASK_INTERRUPTIBLE);
 		spin_unlock_bh(&bg->fcoe_rx_list.lock);
-		set_current_state(TASK_INTERRUPTIBLE);
 	}
-	set_current_state(TASK_RUNNING);
+	__set_current_state(TASK_RUNNING);
 	return 0;
 }
 
@@ -569,7 +568,6 @@
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
 		schedule();
-		set_current_state(TASK_RUNNING);
 		spin_lock_bh(&p->fp_work_lock);
 		while (!list_empty(&p->work_list)) {
 			list_splice_init(&p->work_list, &work_list);
@@ -583,10 +581,10 @@
 
 			spin_lock_bh(&p->fp_work_lock);
 		}
+		__set_current_state(TASK_INTERRUPTIBLE);
 		spin_unlock_bh(&p->fp_work_lock);
-		set_current_state(TASK_INTERRUPTIBLE);
 	}
-	set_current_state(TASK_RUNNING);
+	__set_current_state(TASK_RUNNING);
 
 	return 0;
 }
@@ -661,31 +659,6 @@
 	return 0;
 }
 
-static int  bnx2fc_mfs_update(struct fc_lport *lport)
-{
-	struct fcoe_port *port = lport_priv(lport);
-	struct bnx2fc_hba *hba = port->priv;
-	struct net_device *netdev = hba->netdev;
-	u32 mfs;
-	u32 max_mfs;
-
-	mfs = netdev->mtu - (sizeof(struct fcoe_hdr) +
-			     sizeof(struct fcoe_crc_eof));
-	max_mfs = BNX2FC_MAX_PAYLOAD + sizeof(struct fc_frame_header);
-	BNX2FC_HBA_DBG(lport, "mfs = %d, max_mfs = %d\n", mfs, max_mfs);
-	if (mfs > max_mfs)
-		mfs = max_mfs;
-
-	/* Adjust mfs to be a multiple of 256 bytes */
-	mfs = (((mfs - sizeof(struct fc_frame_header)) / BNX2FC_MIN_PAYLOAD) *
-			BNX2FC_MIN_PAYLOAD);
-	mfs = mfs + sizeof(struct fc_frame_header);
-
-	BNX2FC_HBA_DBG(lport, "Set MFS = %d\n", mfs);
-	if (fc_set_mfs(lport, mfs))
-		return -EINVAL;
-	return 0;
-}
 static void bnx2fc_link_speed_update(struct fc_lport *lport)
 {
 	struct fcoe_port *port = lport_priv(lport);
@@ -754,7 +727,7 @@
 	    !hba->phys_dev->ethtool_ops->get_pauseparam)
 		return -EOPNOTSUPP;
 
-	if (bnx2fc_mfs_update(lport))
+	if (fc_set_mfs(lport, BNX2FC_MFS))
 		return -EINVAL;
 
 	skb_queue_head_init(&port->fcoe_pending_queue);
@@ -825,14 +798,6 @@
 		if (!test_bit(ADAPTER_STATE_UP, &hba->adapter_state))
 			printk(KERN_ERR "indicate_netevent: "\
 					"adapter is not UP!!\n");
-		/* fall thru to update mfs if MTU has changed */
-	case NETDEV_CHANGEMTU:
-		BNX2FC_HBA_DBG(lport, "NETDEV_CHANGEMTU event\n");
-		bnx2fc_mfs_update(lport);
-		mutex_lock(&lport->lp_mutex);
-		list_for_each_entry(vport, &lport->vports, list)
-			bnx2fc_mfs_update(vport);
-		mutex_unlock(&lport->lp_mutex);
 		break;
 
 	case NETDEV_DOWN:
@@ -1095,13 +1060,6 @@
 	struct netdev_hw_addr *ha;
 	int sel_san_mac = 0;
 
-	/* Do not support for bonding device */
-	if ((netdev->priv_flags & IFF_MASTER_ALB) ||
-			(netdev->priv_flags & IFF_SLAVE_INACTIVE) ||
-			(netdev->priv_flags & IFF_MASTER_8023AD)) {
-		return -EOPNOTSUPP;
-	}
-
 	/* setup Source MAC Address */
 	rcu_read_lock();
 	for_each_dev_addr(physdev, ha) {
@@ -1432,16 +1390,9 @@
 	struct net_device *phys_dev;
 	int rc = 0;
 
-	if (!rtnl_trylock())
-		return restart_syscall();
+	rtnl_lock();
 
 	mutex_lock(&bnx2fc_dev_lock);
-#ifdef CONFIG_SCSI_BNX2X_FCOE_MODULE
-	if (THIS_MODULE->state != MODULE_STATE_LIVE) {
-		rc = -ENODEV;
-		goto netdev_err;
-	}
-#endif
 	/* obtain physical netdev */
 	if (netdev->priv_flags & IFF_802_1Q_VLAN)
 		phys_dev = vlan_dev_real_dev(netdev);
@@ -1805,18 +1756,10 @@
 	struct ethtool_drvinfo drvinfo;
 	int rc = 0;
 
-	if (!rtnl_trylock()) {
-		printk(KERN_ERR PFX "retrying for rtnl_lock\n");
-		return -EIO;
-	}
+	rtnl_lock();
 
 	mutex_lock(&bnx2fc_dev_lock);
 
-	if (THIS_MODULE->state != MODULE_STATE_LIVE) {
-		rc = -ENODEV;
-		goto nodev;
-	}
-
 	/* obtain physical netdev */
 	if (netdev->priv_flags & IFF_802_1Q_VLAN)
 		phys_dev = vlan_dev_real_dev(netdev);
@@ -1867,19 +1810,11 @@
 	struct ethtool_drvinfo drvinfo;
 	int rc = 0;
 
-	if (!rtnl_trylock()) {
-		printk(KERN_ERR PFX "retrying for rtnl_lock\n");
-		return -EIO;
-	}
+	rtnl_lock();
 
 	BNX2FC_MISC_DBG("Entered %s\n", __func__);
 	mutex_lock(&bnx2fc_dev_lock);
 
-	if (THIS_MODULE->state != MODULE_STATE_LIVE) {
-		rc = -ENODEV;
-		goto nodev;
-	}
-
 	/* obtain physical netdev */
 	if (netdev->priv_flags & IFF_802_1Q_VLAN)
 		phys_dev = vlan_dev_real_dev(netdev);
@@ -1942,18 +1877,9 @@
 		return -EIO;
 	}
 
-	if (!rtnl_trylock()) {
-		printk(KERN_ERR "trying for rtnl_lock\n");
-		return -EIO;
-	}
-	mutex_lock(&bnx2fc_dev_lock);
+	rtnl_lock();
 
-#ifdef CONFIG_SCSI_BNX2X_FCOE_MODULE
-	if (THIS_MODULE->state != MODULE_STATE_LIVE) {
-		rc = -ENODEV;
-		goto mod_err;
-	}
-#endif
+	mutex_lock(&bnx2fc_dev_lock);
 
 	if (!try_module_get(THIS_MODULE)) {
 		rc = -EINVAL;
@@ -2506,7 +2432,7 @@
 	.change_queue_type	= fc_change_queue_type,
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
-	.can_queue		= (BNX2FC_MAX_OUTSTANDING_CMNDS/2),
+	.can_queue		= BNX2FC_CAN_QUEUE,
 	.use_clustering		= ENABLE_CLUSTERING,
 	.sg_tablesize		= BNX2FC_MAX_BDS_PER_CMD,
 	.max_sectors		= 512,

diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c
index 4f40968..1b680e2 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c

@@ -87,7 +87,7 @@
 	fcoe_init1.task_list_pbl_addr_lo = (u32) hba->task_ctx_bd_dma;
 	fcoe_init1.task_list_pbl_addr_hi =
 				(u32) ((u64) hba->task_ctx_bd_dma >> 32);
-	fcoe_init1.mtu = hba->netdev->mtu;
+	fcoe_init1.mtu = BNX2FC_MINI_JUMBO_MTU;
 
 	fcoe_init1.flags = (PAGE_SHIFT <<
 				FCOE_KWQE_INIT1_LOG_PAGE_SIZE_SHIFT);
@@ -590,7 +590,10 @@
 
 		num_rq = (frame_len + BNX2FC_RQ_BUF_SZ - 1) / BNX2FC_RQ_BUF_SZ;
 
+		spin_lock_bh(&tgt->tgt_lock);
 		rq_data = (unsigned char *)bnx2fc_get_next_rqe(tgt, num_rq);
+		spin_unlock_bh(&tgt->tgt_lock);
+
 		if (rq_data) {
 			buf = rq_data;
 		} else {
@@ -603,8 +606,10 @@
 			}
 
 			for (i = 0; i < num_rq; i++) {
+				spin_lock_bh(&tgt->tgt_lock);
 				rq_data = (unsigned char *)
 					   bnx2fc_get_next_rqe(tgt, 1);
+				spin_unlock_bh(&tgt->tgt_lock);
 				len = BNX2FC_RQ_BUF_SZ;
 				memcpy(buf1, rq_data, len);
 				buf1 += len;
@@ -615,13 +620,15 @@
 
 		if (buf != rq_data)
 			kfree(buf);
+		spin_lock_bh(&tgt->tgt_lock);
 		bnx2fc_return_rqe(tgt, num_rq);
+		spin_unlock_bh(&tgt->tgt_lock);
 		break;
 
 	case FCOE_ERROR_DETECTION_CQE_TYPE:
 		/*
-		 *In case of error reporting CQE a single RQ entry
-		 * is consumes.
+		 * In case of error reporting CQE a single RQ entry
+		 * is consumed.
 		 */
 		spin_lock_bh(&tgt->tgt_lock);
 		num_rq = 1;
@@ -705,6 +712,7 @@
 		 *In case of warning reporting CQE a single RQ entry
 		 * is consumes.
 		 */
+		spin_lock_bh(&tgt->tgt_lock);
 		num_rq = 1;
 		err_entry = (struct fcoe_err_report_entry *)
 			     bnx2fc_get_next_rqe(tgt, 1);
@@ -717,6 +725,7 @@
 			err_entry->tx_buf_off, err_entry->rx_buf_off);
 
 		bnx2fc_return_rqe(tgt, 1);
+		spin_unlock_bh(&tgt->tgt_lock);
 		break;
 
 	default:

diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index 0f1dd23..d3fc302 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c

@@ -11,6 +11,9 @@
  */
 
 #include "bnx2fc.h"
+
+#define RESERVE_FREE_LIST_INDEX num_possible_cpus()
+
 static int bnx2fc_split_bd(struct bnx2fc_cmd *io_req, u64 addr, int sg_len,
 			   int bd_index);
 static int bnx2fc_map_sg(struct bnx2fc_cmd *io_req);
@@ -242,8 +245,9 @@
 	u32 mem_size;
 	u16 xid;
 	int i;
-	int num_ios;
+	int num_ios, num_pri_ios;
 	size_t bd_tbl_sz;
+	int arr_sz = num_possible_cpus() + 1;
 
 	if (max_xid <= min_xid || max_xid == FC_XID_UNKNOWN) {
 		printk(KERN_ERR PFX "cmd_mgr_alloc: Invalid min_xid 0x%x \
@@ -263,14 +267,14 @@
 	}
 
 	cmgr->free_list = kzalloc(sizeof(*cmgr->free_list) *
-				  num_possible_cpus(), GFP_KERNEL);
+				  arr_sz, GFP_KERNEL);
 	if (!cmgr->free_list) {
 		printk(KERN_ERR PFX "failed to alloc free_list\n");
 		goto mem_err;
 	}
 
 	cmgr->free_list_lock = kzalloc(sizeof(*cmgr->free_list_lock) *
-				       num_possible_cpus(), GFP_KERNEL);
+				       arr_sz, GFP_KERNEL);
 	if (!cmgr->free_list_lock) {
 		printk(KERN_ERR PFX "failed to alloc free_list_lock\n");
 		goto mem_err;
@@ -279,13 +283,18 @@
 	cmgr->hba = hba;
 	cmgr->cmds = (struct bnx2fc_cmd **)(cmgr + 1);
 
-	for (i = 0; i < num_possible_cpus(); i++)  {
+	for (i = 0; i < arr_sz; i++)  {
 		INIT_LIST_HEAD(&cmgr->free_list[i]);
 		spin_lock_init(&cmgr->free_list_lock[i]);
 	}
 
-	/* Pre-allocated pool of bnx2fc_cmds */
+	/*
+	 * Pre-allocated pool of bnx2fc_cmds.
+	 * Last entry in the free list array is the free list
+	 * of slow path requests.
+	 */
 	xid = BNX2FC_MIN_XID;
+	num_pri_ios = num_ios - BNX2FC_ELSTM_XIDS;
 	for (i = 0; i < num_ios; i++) {
 		io_req = kzalloc(sizeof(*io_req), GFP_KERNEL);
 
@@ -298,11 +307,13 @@
 		INIT_DELAYED_WORK(&io_req->timeout_work, bnx2fc_cmd_timeout);
 
 		io_req->xid = xid++;
-		if (io_req->xid >= BNX2FC_MAX_OUTSTANDING_CMNDS)
-			printk(KERN_ERR PFX "ERROR allocating xids - 0x%x\n",
-				io_req->xid);
-		list_add_tail(&io_req->link,
-			&cmgr->free_list[io_req->xid % num_possible_cpus()]);
+		if (i < num_pri_ios)
+			list_add_tail(&io_req->link,
+				&cmgr->free_list[io_req->xid %
+						 num_possible_cpus()]);
+		else
+			list_add_tail(&io_req->link,
+				&cmgr->free_list[num_possible_cpus()]);
 		io_req++;
 	}
 
@@ -389,7 +400,7 @@
 	if (!cmgr->free_list)
 		goto free_cmgr;
 
-	for (i = 0; i < num_possible_cpus(); i++)  {
+	for (i = 0; i < num_possible_cpus() + 1; i++)  {
 		struct list_head *list;
 		struct list_head *tmp;
 
@@ -413,6 +424,7 @@
 	struct bnx2fc_cmd *io_req;
 	struct list_head *listp;
 	struct io_bdt *bd_tbl;
+	int index = RESERVE_FREE_LIST_INDEX;
 	u32 max_sqes;
 	u16 xid;
 
@@ -432,26 +444,26 @@
 	 * NOTE: Free list insertions and deletions are protected with
 	 * cmgr lock
 	 */
-	spin_lock_bh(&cmd_mgr->free_list_lock[smp_processor_id()]);
-	if ((list_empty(&(cmd_mgr->free_list[smp_processor_id()]))) ||
+	spin_lock_bh(&cmd_mgr->free_list_lock[index]);
+	if ((list_empty(&(cmd_mgr->free_list[index]))) ||
 	    (tgt->num_active_ios.counter  >= max_sqes)) {
 		BNX2FC_TGT_DBG(tgt, "No free els_tm cmds available "
 			"ios(%d):sqes(%d)\n",
 			tgt->num_active_ios.counter, tgt->max_sqes);
-		if (list_empty(&(cmd_mgr->free_list[smp_processor_id()])))
+		if (list_empty(&(cmd_mgr->free_list[index])))
 			printk(KERN_ERR PFX "elstm_alloc: list_empty\n");
-		spin_unlock_bh(&cmd_mgr->free_list_lock[smp_processor_id()]);
+		spin_unlock_bh(&cmd_mgr->free_list_lock[index]);
 		return NULL;
 	}
 
 	listp = (struct list_head *)
-			cmd_mgr->free_list[smp_processor_id()].next;
+			cmd_mgr->free_list[index].next;
 	list_del_init(listp);
 	io_req = (struct bnx2fc_cmd *) listp;
 	xid = io_req->xid;
 	cmd_mgr->cmds[xid] = io_req;
 	atomic_inc(&tgt->num_active_ios);
-	spin_unlock_bh(&cmd_mgr->free_list_lock[smp_processor_id()]);
+	spin_unlock_bh(&cmd_mgr->free_list_lock[index]);
 
 	INIT_LIST_HEAD(&io_req->link);
 
@@ -479,27 +491,30 @@
 	struct io_bdt *bd_tbl;
 	u32 max_sqes;
 	u16 xid;
+	int index = get_cpu();
 
 	max_sqes = BNX2FC_SCSI_MAX_SQES;
 	/*
 	 * NOTE: Free list insertions and deletions are protected with
 	 * cmgr lock
 	 */
-	spin_lock_bh(&cmd_mgr->free_list_lock[smp_processor_id()]);
-	if ((list_empty(&cmd_mgr->free_list[smp_processor_id()])) ||
+	spin_lock_bh(&cmd_mgr->free_list_lock[index]);
+	if ((list_empty(&cmd_mgr->free_list[index])) ||
 	    (tgt->num_active_ios.counter  >= max_sqes)) {
-		spin_unlock_bh(&cmd_mgr->free_list_lock[smp_processor_id()]);
+		spin_unlock_bh(&cmd_mgr->free_list_lock[index]);
+		put_cpu();
 		return NULL;
 	}
 
 	listp = (struct list_head *)
-		cmd_mgr->free_list[smp_processor_id()].next;
+		cmd_mgr->free_list[index].next;
 	list_del_init(listp);
 	io_req = (struct bnx2fc_cmd *) listp;
 	xid = io_req->xid;
 	cmd_mgr->cmds[xid] = io_req;
 	atomic_inc(&tgt->num_active_ios);
-	spin_unlock_bh(&cmd_mgr->free_list_lock[smp_processor_id()]);
+	spin_unlock_bh(&cmd_mgr->free_list_lock[index]);
+	put_cpu();
 
 	INIT_LIST_HEAD(&io_req->link);
 
@@ -522,8 +537,15 @@
 	struct bnx2fc_cmd *io_req = container_of(ref,
 						struct bnx2fc_cmd, refcount);
 	struct bnx2fc_cmd_mgr *cmd_mgr = io_req->cmd_mgr;
+	int index;
 
-	spin_lock_bh(&cmd_mgr->free_list_lock[smp_processor_id()]);
+	if (io_req->cmd_type == BNX2FC_SCSI_CMD)
+		index = io_req->xid % num_possible_cpus();
+	else
+		index = RESERVE_FREE_LIST_INDEX;
+
+
+	spin_lock_bh(&cmd_mgr->free_list_lock[index]);
 	if (io_req->cmd_type != BNX2FC_SCSI_CMD)
 		bnx2fc_free_mp_resc(io_req);
 	cmd_mgr->cmds[io_req->xid] = NULL;
@@ -531,9 +553,10 @@
 	list_del_init(&io_req->link);
 	/* Add it to the free list */
 	list_add(&io_req->link,
-			&cmd_mgr->free_list[smp_processor_id()]);
+			&cmd_mgr->free_list[index]);
 	atomic_dec(&io_req->tgt->num_active_ios);
-	spin_unlock_bh(&cmd_mgr->free_list_lock[smp_processor_id()]);
+	spin_unlock_bh(&cmd_mgr->free_list_lock[index]);
+
 }
 
 static void bnx2fc_free_mp_resc(struct bnx2fc_cmd *io_req)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c
index 7ea93af..7cc05e4 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c

@@ -304,10 +304,8 @@
 				" not sent to FW\n");
 
 	/* Free session resources */
-	spin_lock_bh(&tgt->cq_lock);
 	bnx2fc_free_session_resc(hba, tgt);
 	bnx2fc_free_conn_id(hba, tgt->fcoe_conn_id);
-	spin_unlock_bh(&tgt->cq_lock);
 }
 
 static int bnx2fc_init_tgt(struct bnx2fc_rport *tgt,
@@ -830,11 +828,13 @@
 		tgt->rq = NULL;
 	}
 	/* Free CQ */
+	spin_lock_bh(&tgt->cq_lock);
 	if (tgt->cq) {
 		dma_free_coherent(&hba->pcidev->dev, tgt->cq_mem_size,
 				    tgt->cq, tgt->cq_dma);
 		tgt->cq = NULL;
 	}
+	spin_unlock_bh(&tgt->cq_lock);
 	/* Free SQ */
 	if (tgt->sq) {
 		dma_free_coherent(&hba->pcidev->dev, tgt->sq_mem_size,

diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 8eeb39f..e98ae33 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c

@@ -132,14 +132,25 @@
 	if (page_count(sg_page(sg)) >= 1 && !recv)
 		return;
 
-	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
+	if (recv) {
+		segment->atomic_mapped = true;
+		segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
+	} else {
+		segment->atomic_mapped = false;
+		/* the xmit path can sleep with the page mapped so use kmap */
+		segment->sg_mapped = kmap(sg_page(sg));
+	}
+
 	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
 }
 
 void iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
 {
 	if (segment->sg_mapped) {
-		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
+		if (segment->atomic_mapped)
+			kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
+		else
+			kunmap(sg_page(segment->sg));
 		segment->sg_mapped = NULL;
 		segment->data = NULL;
 	}

diff --git a/drivers/scsi/lpfc/Makefile b/drivers/scsi/lpfc/Makefile
index 14de249..88928f0 100644
--- a/drivers/scsi/lpfc/Makefile
+++ b/drivers/scsi/lpfc/Makefile

@@ -1,7 +1,7 @@
 #/*******************************************************************
 # * This file is part of the Emulex Linux Device Driver for         *
 # * Fibre Channel Host Bus Adapters.                                *
-# * Copyright (C) 2004-2006 Emulex.  All rights reserved.           *
+# * Copyright (C) 2004-2011 Emulex.  All rights reserved.           *
 # * EMULEX and SLI are trademarks of Emulex.                        *
 # * www.emulex.com                                                  *
 # *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index b64c6da..60e98a6 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h

@@ -539,6 +539,8 @@
 		(struct lpfc_hba *, uint32_t);
 	int (*lpfc_hba_down_link)
 		(struct lpfc_hba *, uint32_t);
+	int (*lpfc_selective_reset)
+		(struct lpfc_hba *);
 
 	/* SLI4 specific HBA data structure */
 	struct lpfc_sli4_hba sli4_hba;
@@ -895,7 +897,18 @@
 	return;
 }
 
-static inline void
+static inline int
+lpfc_readl(void __iomem *addr, uint32_t *data)
+{
+	uint32_t temp;
+	temp = readl(addr);
+	if (temp == 0xffffffff)
+		return -EIO;
+	*data = temp;
+	return 0;
+}
+
+static inline int
 lpfc_sli_read_hs(struct lpfc_hba *phba)
 {
 	/*
@@ -904,15 +917,17 @@
 	 */
 	phba->sli.slistat.err_attn_event++;
 
-	/* Save status info */
-	phba->work_hs = readl(phba->HSregaddr);
-	phba->work_status[0] = readl(phba->MBslimaddr + 0xa8);
-	phba->work_status[1] = readl(phba->MBslimaddr + 0xac);
+	/* Save status info and check for unplug error */
+	if (lpfc_readl(phba->HSregaddr, &phba->work_hs) ||
+		lpfc_readl(phba->MBslimaddr + 0xa8, &phba->work_status[0]) ||
+		lpfc_readl(phba->MBslimaddr + 0xac, &phba->work_status[1])) {
+		return -EIO;
+	}
 
 	/* Clear chip Host Attention error bit */
 	writel(HA_ERATT, phba->HAregaddr);
 	readl(phba->HAregaddr); /* flush */
 	phba->pport->stopped = 1;
 
-	return;
+	return 0;
 }

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index e7c020d..4e0faa0 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c

@@ -685,7 +685,7 @@
  * -EIO reset not configured or error posting the event
  * zero for success
  **/
-static int
+int
 lpfc_selective_reset(struct lpfc_hba *phba)
 {
 	struct completion online_compl;
@@ -746,7 +746,7 @@
 	int status = -EINVAL;
 
 	if (strncmp(buf, "selective", sizeof("selective") - 1) == 0)
-		status = lpfc_selective_reset(phba);
+		status = phba->lpfc_selective_reset(phba);
 
 	if (status == 0)
 		return strlen(buf);
@@ -1224,7 +1224,10 @@
 	if (val & ENABLE_FCP_RING_POLLING) {
 		if ((val & DISABLE_FCP_RING_INT) &&
 		    !(old_val & DISABLE_FCP_RING_INT)) {
-			creg_val = readl(phba->HCregaddr);
+			if (lpfc_readl(phba->HCregaddr, &creg_val)) {
+				spin_unlock_irq(&phba->hbalock);
+				return -EINVAL;
+			}
 			creg_val &= ~(HC_R0INT_ENA << LPFC_FCP_RING);
 			writel(creg_val, phba->HCregaddr);
 			readl(phba->HCregaddr); /* flush */
@@ -1242,7 +1245,10 @@
 		spin_unlock_irq(&phba->hbalock);
 		del_timer(&phba->fcp_poll_timer);
 		spin_lock_irq(&phba->hbalock);
-		creg_val = readl(phba->HCregaddr);
+		if (lpfc_readl(phba->HCregaddr, &creg_val)) {
+			spin_unlock_irq(&phba->hbalock);
+			return -EINVAL;
+		}
 		creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING);
 		writel(creg_val, phba->HCregaddr);
 		readl(phba->HCregaddr); /* flush */

diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 0dd43bb..793b9f1 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2010 Emulex.  All rights reserved.                *
+ * Copyright (C) 2009-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -348,7 +348,10 @@
 	dd_data->context_un.iocb.bmp = bmp;
 
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
-		creg_val = readl(phba->HCregaddr);
+		if (lpfc_readl(phba->HCregaddr, &creg_val)) {
+			rc = -EIO ;
+			goto free_cmdiocbq;
+		}
 		creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING);
 		writel(creg_val, phba->HCregaddr);
 		readl(phba->HCregaddr); /* flush */
@@ -599,7 +602,10 @@
 	dd_data->context_un.iocb.ndlp = ndlp;
 
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
-		creg_val = readl(phba->HCregaddr);
+		if (lpfc_readl(phba->HCregaddr, &creg_val)) {
+			rc = -EIO;
+			goto linkdown_err;
+		}
 		creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING);
 		writel(creg_val, phba->HCregaddr);
 		readl(phba->HCregaddr); /* flush */
@@ -613,6 +619,7 @@
 	else
 		rc = -EIO;
 
+linkdown_err:
 	pci_unmap_sg(phba->pcidev, job->request_payload.sg_list,
 		     job->request_payload.sg_cnt, DMA_TO_DEVICE);
 	pci_unmap_sg(phba->pcidev, job->reply_payload.sg_list,
@@ -1357,7 +1364,10 @@
 	dd_data->context_un.iocb.ndlp = ndlp;
 
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
-		creg_val = readl(phba->HCregaddr);
+		if (lpfc_readl(phba->HCregaddr, &creg_val)) {
+			rc = -IOCB_ERROR;
+			goto issue_ct_rsp_exit;
+		}
 		creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING);
 		writel(creg_val, phba->HCregaddr);
 		readl(phba->HCregaddr); /* flush */
@@ -2479,16 +2489,18 @@
 
 	from = (uint8_t *)dd_data->context_un.mbox.mb;
 	job = dd_data->context_un.mbox.set_job;
-	size = job->reply_payload.payload_len;
-	job->reply->reply_payload_rcv_len =
-		sg_copy_from_buffer(job->reply_payload.sg_list,
-				job->reply_payload.sg_cnt,
-				from, size);
-	job->reply->result = 0;
+	if (job) {
+		size = job->reply_payload.payload_len;
+		job->reply->reply_payload_rcv_len =
+			sg_copy_from_buffer(job->reply_payload.sg_list,
+					job->reply_payload.sg_cnt,
+					from, size);
+		job->reply->result = 0;
 
+		job->dd_data = NULL;
+		job->job_done(job);
+	}
 	dd_data->context_un.mbox.set_job = NULL;
-	job->dd_data = NULL;
-	job->job_done(job);
 	/* need to hold the lock until we call job done to hold off
 	 * the timeout handler returning to the midlayer while
 	 * we are stillprocessing the job

diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 3d40023..f0b332f 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2010 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *
@@ -254,8 +254,8 @@
 void lpfc_sli_cancel_iocbs(struct lpfc_hba *, struct list_head *, uint32_t,
 			   uint32_t);
 void lpfc_sli_wake_mbox_wait(struct lpfc_hba *, LPFC_MBOXQ_t *);
-
-void lpfc_reset_barrier(struct lpfc_hba * phba);
+int lpfc_selective_reset(struct lpfc_hba *);
+void lpfc_reset_barrier(struct lpfc_hba *);
 int lpfc_sli_brdready(struct lpfc_hba *, uint32_t);
 int lpfc_sli_brdkill(struct lpfc_hba *);
 int lpfc_sli_brdreset(struct lpfc_hba *);

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 8e28edf..735028f 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c

@@ -89,7 +89,8 @@
 		return 0;
 
 	/* Read the HBA Host Attention Register */
-	ha_copy = readl(phba->HAregaddr);
+	if (lpfc_readl(phba->HAregaddr, &ha_copy))
+		return 1;
 
 	if (!(ha_copy & HA_LATT))
 		return 0;

diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 94ae37c..95f11ed 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h

@@ -1344,7 +1344,7 @@
 #define HS_FFER1       0x80000000	/* Bit 31 */
 #define HS_CRIT_TEMP   0x00000100	/* Bit 8  */
 #define HS_FFERM       0xFF000100	/* Mask for error bits 31:24 and 8 */
-
+#define UNPLUG_ERR     0x00000001	/* Indicate pci hot unplug */
 /* Host Control Register */
 
 #define HC_REG_OFFSET  12	/* Byte offset from register base address */
@@ -1713,6 +1713,17 @@
 #define pde6_apptagval_WORD	word2
 };
 
+struct lpfc_pde7 {
+	uint32_t word0;
+#define pde7_type_SHIFT		24
+#define pde7_type_MASK		0x000000ff
+#define pde7_type_WORD		word0
+#define pde7_rsvd0_SHIFT	0
+#define pde7_rsvd0_MASK		0x00ffffff
+#define pde7_rsvd0_WORD		word0
+	uint32_t addrHigh;
+	uint32_t addrLow;
+};
 
 /* Structure for MB Command LOAD_SM and DOWN_LOAD */
 
@@ -3621,7 +3632,7 @@
 		ASYNCSTAT_FIELDS asyncstat; /* async_status iocb */
 		QUE_XRI64_CX_FIELDS quexri64cx; /* que_xri64_cx fields */
 		struct rcv_seq64 rcvseq64;	/* RCV_SEQ64 and RCV_CONT64 */
-		struct sli4_bls_acc bls_acc; /* UNSOL ABTS BLS_ACC params */
+		struct sli4_bls_rsp bls_rsp; /* UNSOL ABTS BLS_RSP params */
 		uint32_t ulpWord[IOCB_WORD_SZ - 2];	/* generic 6 'words' */
 	} un;
 	union {

diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index c7178d6..8433ac0 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h

@@ -215,7 +215,7 @@
 #define lpfc_fip_flag_WORD word0
 };
 
-struct sli4_bls_acc {
+struct sli4_bls_rsp {
 	uint32_t word0_rsvd;      /* Word0 must be reserved */
 	uint32_t word1;
 #define lpfc_abts_orig_SHIFT      0
@@ -231,6 +231,16 @@
 #define lpfc_abts_oxid_MASK       0x0000FFFF
 #define lpfc_abts_oxid_WORD       word2
 	uint32_t word3;
+#define lpfc_vndr_code_SHIFT	0
+#define lpfc_vndr_code_MASK	0x000000FF
+#define lpfc_vndr_code_WORD	word3
+#define lpfc_rsn_expln_SHIFT	8
+#define lpfc_rsn_expln_MASK	0x000000FF
+#define lpfc_rsn_expln_WORD	word3
+#define lpfc_rsn_code_SHIFT	16
+#define lpfc_rsn_code_MASK	0x000000FF
+#define lpfc_rsn_code_WORD	word3
+
 	uint32_t word4;
 	uint32_t word5_rsvd;	/* Word5 must be reserved */
 };
@@ -711,21 +721,27 @@
 union lpfc_sli4_cfg_shdr {
 	struct {
 		uint32_t word6;
-#define lpfc_mbox_hdr_opcode_SHIFT		0
-#define lpfc_mbox_hdr_opcode_MASK		0x000000FF
-#define lpfc_mbox_hdr_opcode_WORD		word6
-#define lpfc_mbox_hdr_subsystem_SHIFT		8
-#define lpfc_mbox_hdr_subsystem_MASK		0x000000FF
-#define lpfc_mbox_hdr_subsystem_WORD		word6
-#define lpfc_mbox_hdr_port_number_SHIFT		16
-#define lpfc_mbox_hdr_port_number_MASK		0x000000FF
-#define lpfc_mbox_hdr_port_number_WORD		word6
-#define lpfc_mbox_hdr_domain_SHIFT		24
-#define lpfc_mbox_hdr_domain_MASK		0x000000FF
-#define lpfc_mbox_hdr_domain_WORD		word6
+#define lpfc_mbox_hdr_opcode_SHIFT	0
+#define lpfc_mbox_hdr_opcode_MASK	0x000000FF
+#define lpfc_mbox_hdr_opcode_WORD	word6
+#define lpfc_mbox_hdr_subsystem_SHIFT	8
+#define lpfc_mbox_hdr_subsystem_MASK	0x000000FF
+#define lpfc_mbox_hdr_subsystem_WORD	word6
+#define lpfc_mbox_hdr_port_number_SHIFT	16
+#define lpfc_mbox_hdr_port_number_MASK	0x000000FF
+#define lpfc_mbox_hdr_port_number_WORD	word6
+#define lpfc_mbox_hdr_domain_SHIFT	24
+#define lpfc_mbox_hdr_domain_MASK	0x000000FF
+#define lpfc_mbox_hdr_domain_WORD	word6
 		uint32_t timeout;
 		uint32_t request_length;
-		uint32_t reserved9;
+		uint32_t word9;
+#define lpfc_mbox_hdr_version_SHIFT	0
+#define lpfc_mbox_hdr_version_MASK	0x000000FF
+#define lpfc_mbox_hdr_version_WORD	word9
+#define LPFC_Q_CREATE_VERSION_2	2
+#define LPFC_Q_CREATE_VERSION_1	1
+#define LPFC_Q_CREATE_VERSION_0	0
 	} request;
 	struct {
 		uint32_t word6;
@@ -917,9 +933,12 @@
 #define LPFC_CQ_CNT_512		0x1
 #define LPFC_CQ_CNT_1024	0x2
 	uint32_t word1;
-#define lpfc_cq_eq_id_SHIFT		22
+#define lpfc_cq_eq_id_SHIFT		22	/* Version 0 Only */
 #define lpfc_cq_eq_id_MASK		0x000000FF
 #define lpfc_cq_eq_id_WORD		word1
+#define lpfc_cq_eq_id_2_SHIFT		0 	/* Version 2 Only */
+#define lpfc_cq_eq_id_2_MASK		0x0000FFFF
+#define lpfc_cq_eq_id_2_WORD		word1
 	uint32_t reserved0;
 	uint32_t reserved1;
 };
@@ -929,6 +948,9 @@
 	union {
 		struct {
 			uint32_t word0;
+#define lpfc_mbx_cq_create_page_size_SHIFT	16	/* Version 2 Only */
+#define lpfc_mbx_cq_create_page_size_MASK	0x000000FF
+#define lpfc_mbx_cq_create_page_size_WORD	word0
 #define lpfc_mbx_cq_create_num_pages_SHIFT	0
 #define lpfc_mbx_cq_create_num_pages_MASK	0x0000FFFF
 #define lpfc_mbx_cq_create_num_pages_WORD	word0
@@ -969,7 +991,7 @@
 struct lpfc_mbx_wq_create {
 	struct mbox_header header;
 	union {
-		struct {
+		struct {	/* Version 0 Request */
 			uint32_t word0;
 #define lpfc_mbx_wq_create_num_pages_SHIFT	0
 #define lpfc_mbx_wq_create_num_pages_MASK	0x0000FFFF
@@ -979,6 +1001,23 @@
 #define lpfc_mbx_wq_create_cq_id_WORD		word0
 			struct dma_address page[LPFC_MAX_WQ_PAGE];
 		} request;
+		struct {	/* Version 1 Request */
+			uint32_t word0;	/* Word 0 is the same as in v0 */
+			uint32_t word1;
+#define lpfc_mbx_wq_create_page_size_SHIFT	0
+#define lpfc_mbx_wq_create_page_size_MASK	0x000000FF
+#define lpfc_mbx_wq_create_page_size_WORD	word1
+#define lpfc_mbx_wq_create_wqe_size_SHIFT	8
+#define lpfc_mbx_wq_create_wqe_size_MASK	0x0000000F
+#define lpfc_mbx_wq_create_wqe_size_WORD	word1
+#define LPFC_WQ_WQE_SIZE_64	0x5
+#define LPFC_WQ_WQE_SIZE_128	0x6
+#define lpfc_mbx_wq_create_wqe_count_SHIFT	16
+#define lpfc_mbx_wq_create_wqe_count_MASK	0x0000FFFF
+#define lpfc_mbx_wq_create_wqe_count_WORD	word1
+			uint32_t word2;
+			struct dma_address page[LPFC_MAX_WQ_PAGE-1];
+		} request_1;
 		struct {
 			uint32_t word0;
 #define lpfc_mbx_wq_create_q_id_SHIFT	0
@@ -1007,13 +1046,22 @@
 #define LPFC_DATA_BUF_SIZE 2048
 struct rq_context {
 	uint32_t word0;
-#define lpfc_rq_context_rq_size_SHIFT	16
-#define lpfc_rq_context_rq_size_MASK	0x0000000F
-#define lpfc_rq_context_rq_size_WORD	word0
+#define lpfc_rq_context_rqe_count_SHIFT	16	/* Version 0 Only */
+#define lpfc_rq_context_rqe_count_MASK	0x0000000F
+#define lpfc_rq_context_rqe_count_WORD	word0
 #define LPFC_RQ_RING_SIZE_512		9	/* 512 entries */
 #define LPFC_RQ_RING_SIZE_1024		10	/* 1024 entries */
 #define LPFC_RQ_RING_SIZE_2048		11	/* 2048 entries */
 #define LPFC_RQ_RING_SIZE_4096		12	/* 4096 entries */
+#define lpfc_rq_context_rqe_count_1_SHIFT	16	/* Version 1 Only */
+#define lpfc_rq_context_rqe_count_1_MASK	0x0000FFFF
+#define lpfc_rq_context_rqe_count_1_WORD	word0
+#define lpfc_rq_context_rqe_size_SHIFT	8		/* Version 1 Only */
+#define lpfc_rq_context_rqe_size_MASK	0x0000000F
+#define lpfc_rq_context_rqe_size_WORD	word0
+#define lpfc_rq_context_page_size_SHIFT	0		/* Version 1 Only */
+#define lpfc_rq_context_page_size_MASK	0x000000FF
+#define lpfc_rq_context_page_size_WORD	word0
 	uint32_t reserved1;
 	uint32_t word2;
 #define lpfc_rq_context_cq_id_SHIFT	16
@@ -1022,7 +1070,7 @@
 #define lpfc_rq_context_buf_size_SHIFT	0
 #define lpfc_rq_context_buf_size_MASK	0x0000FFFF
 #define lpfc_rq_context_buf_size_WORD	word2
-	uint32_t reserved3;
+	uint32_t buffer_size;				/* Version 1 Only */
 };
 
 struct lpfc_mbx_rq_create {
@@ -1062,16 +1110,16 @@
 
 struct mq_context {
 	uint32_t word0;
-#define lpfc_mq_context_cq_id_SHIFT	22
+#define lpfc_mq_context_cq_id_SHIFT	22 	/* Version 0 Only */
 #define lpfc_mq_context_cq_id_MASK	0x000003FF
 #define lpfc_mq_context_cq_id_WORD	word0
-#define lpfc_mq_context_count_SHIFT	16
-#define lpfc_mq_context_count_MASK	0x0000000F
-#define lpfc_mq_context_count_WORD	word0
-#define LPFC_MQ_CNT_16		0x5
-#define LPFC_MQ_CNT_32		0x6
-#define LPFC_MQ_CNT_64		0x7
-#define LPFC_MQ_CNT_128		0x8
+#define lpfc_mq_context_ring_size_SHIFT	16
+#define lpfc_mq_context_ring_size_MASK	0x0000000F
+#define lpfc_mq_context_ring_size_WORD	word0
+#define LPFC_MQ_RING_SIZE_16		0x5
+#define LPFC_MQ_RING_SIZE_32		0x6
+#define LPFC_MQ_RING_SIZE_64		0x7
+#define LPFC_MQ_RING_SIZE_128		0x8
 	uint32_t word1;
 #define lpfc_mq_context_valid_SHIFT	31
 #define lpfc_mq_context_valid_MASK	0x00000001
@@ -1105,9 +1153,12 @@
 	union {
 		struct {
 			uint32_t word0;
-#define lpfc_mbx_mq_create_ext_num_pages_SHIFT		0
-#define lpfc_mbx_mq_create_ext_num_pages_MASK		0x0000FFFF
-#define lpfc_mbx_mq_create_ext_num_pages_WORD		word0
+#define lpfc_mbx_mq_create_ext_num_pages_SHIFT	0
+#define lpfc_mbx_mq_create_ext_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_mq_create_ext_num_pages_WORD	word0
+#define lpfc_mbx_mq_create_ext_cq_id_SHIFT	16	/* Version 1 Only */
+#define lpfc_mbx_mq_create_ext_cq_id_MASK	0x0000FFFF
+#define lpfc_mbx_mq_create_ext_cq_id_WORD	word0
 			uint32_t async_evt_bmap;
 #define lpfc_mbx_mq_create_ext_async_evt_link_SHIFT	LPFC_TRAILER_CODE_LINK
 #define lpfc_mbx_mq_create_ext_async_evt_link_MASK	0x00000001

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 35665cf..e6ebe51 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2010 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -507,7 +507,10 @@
 	phba->hba_flag &= ~HBA_ERATT_HANDLED;
 
 	/* Enable appropriate host interrupts */
-	status = readl(phba->HCregaddr);
+	if (lpfc_readl(phba->HCregaddr, &status)) {
+		spin_unlock_irq(&phba->hbalock);
+		return -EIO;
+	}
 	status |= HC_MBINT_ENA | HC_ERINT_ENA | HC_LAINT_ENA;
 	if (psli->num_rings > 0)
 		status |= HC_R0INT_ENA;
@@ -1222,7 +1225,10 @@
 	/* Wait for the ER1 bit to clear.*/
 	while (phba->work_hs & HS_FFER1) {
 		msleep(100);
-		phba->work_hs = readl(phba->HSregaddr);
+		if (lpfc_readl(phba->HSregaddr, &phba->work_hs)) {
+			phba->work_hs = UNPLUG_ERR ;
+			break;
+		}
 		/* If driver is unloading let the worker thread continue */
 		if (phba->pport->load_flag & FC_UNLOADING) {
 			phba->work_hs = 0;
@@ -4474,6 +4480,7 @@
 {
 	phba->lpfc_hba_init_link = lpfc_hba_init_link;
 	phba->lpfc_hba_down_link = lpfc_hba_down_link;
+	phba->lpfc_selective_reset = lpfc_selective_reset;
 	switch (dev_grp) {
 	case LPFC_PCI_DEV_LP:
 		phba->lpfc_hba_down_post = lpfc_hba_down_post_s3;
@@ -5385,13 +5392,16 @@
 	int i, port_error = 0;
 	uint32_t if_type;
 
+	memset(&portsmphr_reg, 0, sizeof(portsmphr_reg));
+	memset(&reg_data, 0, sizeof(reg_data));
 	if (!phba->sli4_hba.PSMPHRregaddr)
 		return -ENODEV;
 
 	/* Wait up to 30 seconds for the SLI Port POST done and ready */
 	for (i = 0; i < 3000; i++) {
-		portsmphr_reg.word0 = readl(phba->sli4_hba.PSMPHRregaddr);
-		if (bf_get(lpfc_port_smphr_perr, &portsmphr_reg)) {
+		if (lpfc_readl(phba->sli4_hba.PSMPHRregaddr,
+			&portsmphr_reg.word0) ||
+			(bf_get(lpfc_port_smphr_perr, &portsmphr_reg))) {
 			/* Port has a fatal POST error, break out */
 			port_error = -ENODEV;
 			break;
@@ -5472,9 +5482,9 @@
 			break;
 		case LPFC_SLI_INTF_IF_TYPE_2:
 			/* Final checks.  The port status should be clean. */
-			reg_data.word0 =
-				readl(phba->sli4_hba.u.if_type2.STATUSregaddr);
-			if (bf_get(lpfc_sliport_status_err, &reg_data)) {
+			if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr,
+				&reg_data.word0) ||
+				bf_get(lpfc_sliport_status_err, &reg_data)) {
 				phba->work_status[0] =
 					readl(phba->sli4_hba.u.if_type2.
 					      ERR1regaddr);
@@ -6760,9 +6770,11 @@
 			 * the loop again.
 			 */
 			for (rdy_chk = 0; rdy_chk < 1000; rdy_chk++) {
-				reg_data.word0 =
-					readl(phba->sli4_hba.u.if_type2.
-					      STATUSregaddr);
+				if (lpfc_readl(phba->sli4_hba.u.if_type2.
+					      STATUSregaddr, &reg_data.word0)) {
+					rc = -ENODEV;
+					break;
+				}
 				if (bf_get(lpfc_sliport_status_rdy, &reg_data))
 					break;
 				if (bf_get(lpfc_sliport_status_rn, &reg_data)) {
@@ -6783,8 +6795,11 @@
 			}
 
 			/* Detect any port errors. */
-			reg_data.word0 = readl(phba->sli4_hba.u.if_type2.
-					       STATUSregaddr);
+			if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr,
+				 &reg_data.word0)) {
+				rc = -ENODEV;
+				break;
+			}
 			if ((bf_get(lpfc_sliport_status_err, &reg_data)) ||
 			    (rdy_chk >= 1000)) {
 				phba->work_status[0] = readl(

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index bf34178..2b962b0 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -1514,10 +1514,11 @@
 	struct scatterlist *sgpe = NULL; /* s/g prot entry */
 	struct lpfc_pde5 *pde5 = NULL;
 	struct lpfc_pde6 *pde6 = NULL;
-	struct ulp_bde64 *prot_bde = NULL;
+	struct lpfc_pde7 *pde7 = NULL;
 	dma_addr_t dataphysaddr, protphysaddr;
 	unsigned short curr_data = 0, curr_prot = 0;
-	unsigned int split_offset, protgroup_len;
+	unsigned int split_offset;
+	unsigned int protgroup_len, protgroup_offset = 0, protgroup_remainder;
 	unsigned int protgrp_blks, protgrp_bytes;
 	unsigned int remainder, subtotal;
 	int status;
@@ -1585,23 +1586,33 @@
 		bpl++;
 
 		/* setup the first BDE that points to protection buffer */
-		prot_bde = (struct ulp_bde64 *) bpl;
-		protphysaddr = sg_dma_address(sgpe);
-		prot_bde->addrHigh = le32_to_cpu(putPaddrLow(protphysaddr));
-		prot_bde->addrLow = le32_to_cpu(putPaddrHigh(protphysaddr));
-		protgroup_len = sg_dma_len(sgpe);
+		protphysaddr = sg_dma_address(sgpe) + protgroup_offset;
+		protgroup_len = sg_dma_len(sgpe) - protgroup_offset;
 
 		/* must be integer multiple of the DIF block length */
 		BUG_ON(protgroup_len % 8);
 
+		pde7 = (struct lpfc_pde7 *) bpl;
+		memset(pde7, 0, sizeof(struct lpfc_pde7));
+		bf_set(pde7_type, pde7, LPFC_PDE7_DESCRIPTOR);
+
+		pde7->addrHigh = le32_to_cpu(putPaddrLow(protphysaddr));
+		pde7->addrLow = le32_to_cpu(putPaddrHigh(protphysaddr));
+
 		protgrp_blks = protgroup_len / 8;
 		protgrp_bytes = protgrp_blks * blksize;
 
-		prot_bde->tus.f.bdeSize = protgroup_len;
-		prot_bde->tus.f.bdeFlags = LPFC_PDE7_DESCRIPTOR;
-		prot_bde->tus.w = le32_to_cpu(bpl->tus.w);
+		/* check if this pde is crossing the 4K boundary; if so split */
+		if ((pde7->addrLow & 0xfff) + protgroup_len > 0x1000) {
+			protgroup_remainder = 0x1000 - (pde7->addrLow & 0xfff);
+			protgroup_offset += protgroup_remainder;
+			protgrp_blks = protgroup_remainder / 8;
+			protgrp_bytes = protgroup_remainder * blksize;
+		} else {
+			protgroup_offset = 0;
+			curr_prot++;
+		}
 
-		curr_prot++;
 		num_bde++;
 
 		/* setup BDE's for data blocks associated with DIF data */
@@ -1653,6 +1664,13 @@
 
 		}
 
+		if (protgroup_offset) {
+			/* update the reference tag */
+			reftag += protgrp_blks;
+			bpl++;
+			continue;
+		}
+
 		/* are we done ? */
 		if (curr_prot == protcnt) {
 			alldone = 1;
@@ -1675,6 +1693,7 @@
 
 	return num_bde;
 }
+
 /*
  * Given a SCSI command that supports DIF, determine composition of protection
  * groups involved in setting up buffer lists

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2ee0374..4746dcd 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -3477,7 +3477,8 @@
 	int retval = 0;
 
 	/* Read the HBA Host Status Register */
-	status = readl(phba->HSregaddr);
+	if (lpfc_readl(phba->HSregaddr, &status))
+		return 1;
 
 	/*
 	 * Check status register every 100ms for 5 retries, then every
@@ -3502,7 +3503,10 @@
 			lpfc_sli_brdrestart(phba);
 		}
 		/* Read the HBA Host Status Register */
-		status = readl(phba->HSregaddr);
+		if (lpfc_readl(phba->HSregaddr, &status)) {
+			retval = 1;
+			break;
+		}
 	}
 
 	/* Check to see if any errors occurred during init */
@@ -3584,7 +3588,7 @@
 	uint32_t __iomem *resp_buf;
 	uint32_t __iomem *mbox_buf;
 	volatile uint32_t mbox;
-	uint32_t hc_copy;
+	uint32_t hc_copy, ha_copy, resp_data;
 	int  i;
 	uint8_t hdrtype;
 
@@ -3601,12 +3605,15 @@
 	resp_buf = phba->MBslimaddr;
 
 	/* Disable the error attention */
-	hc_copy = readl(phba->HCregaddr);
+	if (lpfc_readl(phba->HCregaddr, &hc_copy))
+		return;
 	writel((hc_copy & ~HC_ERINT_ENA), phba->HCregaddr);
 	readl(phba->HCregaddr); /* flush */
 	phba->link_flag |= LS_IGNORE_ERATT;
 
-	if (readl(phba->HAregaddr) & HA_ERATT) {
+	if (lpfc_readl(phba->HAregaddr, &ha_copy))
+		return;
+	if (ha_copy & HA_ERATT) {
 		/* Clear Chip error bit */
 		writel(HA_ERATT, phba->HAregaddr);
 		phba->pport->stopped = 1;
@@ -3620,11 +3627,18 @@
 	mbox_buf = phba->MBslimaddr;
 	writel(mbox, mbox_buf);
 
-	for (i = 0;
-	     readl(resp_buf + 1) != ~(BARRIER_TEST_PATTERN) && i < 50; i++)
-		mdelay(1);
-
-	if (readl(resp_buf + 1) != ~(BARRIER_TEST_PATTERN)) {
+	for (i = 0; i < 50; i++) {
+		if (lpfc_readl((resp_buf + 1), &resp_data))
+			return;
+		if (resp_data != ~(BARRIER_TEST_PATTERN))
+			mdelay(1);
+		else
+			break;
+	}
+	resp_data = 0;
+	if (lpfc_readl((resp_buf + 1), &resp_data))
+		return;
+	if (resp_data  != ~(BARRIER_TEST_PATTERN)) {
 		if (phba->sli.sli_flag & LPFC_SLI_ACTIVE ||
 		    phba->pport->stopped)
 			goto restore_hc;
@@ -3633,13 +3647,26 @@
 	}
 
 	((MAILBOX_t *)&mbox)->mbxOwner = OWN_HOST;
-	for (i = 0; readl(resp_buf) != mbox &&  i < 500; i++)
-		mdelay(1);
+	resp_data = 0;
+	for (i = 0; i < 500; i++) {
+		if (lpfc_readl(resp_buf, &resp_data))
+			return;
+		if (resp_data != mbox)
+			mdelay(1);
+		else
+			break;
+	}
 
 clear_errat:
 
-	while (!(readl(phba->HAregaddr) & HA_ERATT) && ++i < 500)
-		mdelay(1);
+	while (++i < 500) {
+		if (lpfc_readl(phba->HAregaddr, &ha_copy))
+			return;
+		if (!(ha_copy & HA_ERATT))
+			mdelay(1);
+		else
+			break;
+	}
 
 	if (readl(phba->HAregaddr) & HA_ERATT) {
 		writel(HA_ERATT, phba->HAregaddr);
@@ -3686,7 +3713,11 @@
 
 	/* Disable the error attention */
 	spin_lock_irq(&phba->hbalock);
-	status = readl(phba->HCregaddr);
+	if (lpfc_readl(phba->HCregaddr, &status)) {
+		spin_unlock_irq(&phba->hbalock);
+		mempool_free(pmb, phba->mbox_mem_pool);
+		return 1;
+	}
 	status &= ~HC_ERINT_ENA;
 	writel(status, phba->HCregaddr);
 	readl(phba->HCregaddr); /* flush */
@@ -3720,11 +3751,12 @@
 	 * 3 seconds we still set HBA_ERROR state because the status of the
 	 * board is now undefined.
 	 */
-	ha_copy = readl(phba->HAregaddr);
-
+	if (lpfc_readl(phba->HAregaddr, &ha_copy))
+		return 1;
 	while ((i++ < 30) && !(ha_copy & HA_ERATT)) {
 		mdelay(100);
-		ha_copy = readl(phba->HAregaddr);
+		if (lpfc_readl(phba->HAregaddr, &ha_copy))
+			return 1;
 	}
 
 	del_timer_sync(&psli->mbox_tmo);
@@ -4018,7 +4050,8 @@
 	uint32_t status, i = 0;
 
 	/* Read the HBA Host Status Register */
-	status = readl(phba->HSregaddr);
+	if (lpfc_readl(phba->HSregaddr, &status))
+		return -EIO;
 
 	/* Check status register to see what current state is */
 	i = 0;
@@ -4073,7 +4106,8 @@
 			lpfc_sli_brdrestart(phba);
 		}
 		/* Read the HBA Host Status Register */
-		status = readl(phba->HSregaddr);
+		if (lpfc_readl(phba->HSregaddr, &status))
+			return -EIO;
 	}
 
 	/* Check to see if any errors occurred during init */
@@ -5136,7 +5170,7 @@
 	MAILBOX_t *mb;
 	struct lpfc_sli *psli = &phba->sli;
 	uint32_t status, evtctr;
-	uint32_t ha_copy;
+	uint32_t ha_copy, hc_copy;
 	int i;
 	unsigned long timeout;
 	unsigned long drvr_flag = 0;
@@ -5202,15 +5236,17 @@
 		goto out_not_finished;
 	}
 
-	if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT &&
-	    !(readl(phba->HCregaddr) & HC_MBINT_ENA)) {
-		spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
-		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+	if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT) {
+		if (lpfc_readl(phba->HCregaddr, &hc_copy) ||
+			!(hc_copy & HC_MBINT_ENA)) {
+			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 				"(%d):2528 Mailbox command x%x cannot "
 				"issue Data: x%x x%x\n",
 				pmbox->vport ? pmbox->vport->vpi : 0,
 				pmbox->u.mb.mbxCommand, psli->sli_flag, flag);
-		goto out_not_finished;
+			goto out_not_finished;
+		}
 	}
 
 	if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) {
@@ -5408,11 +5444,19 @@
 			word0 = le32_to_cpu(word0);
 		} else {
 			/* First read mbox status word */
-			word0 = readl(phba->MBslimaddr);
+			if (lpfc_readl(phba->MBslimaddr, &word0)) {
+				spin_unlock_irqrestore(&phba->hbalock,
+						       drvr_flag);
+				goto out_not_finished;
+			}
 		}
 
 		/* Read the HBA Host Attention Register */
-		ha_copy = readl(phba->HAregaddr);
+		if (lpfc_readl(phba->HAregaddr, &ha_copy)) {
+			spin_unlock_irqrestore(&phba->hbalock,
+						       drvr_flag);
+			goto out_not_finished;
+		}
 		timeout = msecs_to_jiffies(lpfc_mbox_tmo_val(phba,
 							     mb->mbxCommand) *
 					   1000) + jiffies;
@@ -5463,7 +5507,11 @@
 				word0 = readl(phba->MBslimaddr);
 			}
 			/* Read the HBA Host Attention Register */
-			ha_copy = readl(phba->HAregaddr);
+			if (lpfc_readl(phba->HAregaddr, &ha_copy)) {
+				spin_unlock_irqrestore(&phba->hbalock,
+						       drvr_flag);
+				goto out_not_finished;
+			}
 		}
 
 		if (psli->sli_flag & LPFC_SLI_ACTIVE) {
@@ -6263,7 +6311,6 @@
 				bf_set(lpfc_sli4_sge_last, sgl, 1);
 			else
 				bf_set(lpfc_sli4_sge_last, sgl, 0);
-			sgl->word2 = cpu_to_le32(sgl->word2);
 			/* swap the size field back to the cpu so we
 			 * can assign it to the sgl.
 			 */
@@ -6283,6 +6330,7 @@
 				bf_set(lpfc_sli4_sge_offset, sgl, offset);
 				offset += bde.tus.f.bdeSize;
 			}
+			sgl->word2 = cpu_to_le32(sgl->word2);
 			bpl++;
 			sgl++;
 		}
@@ -6528,9 +6576,9 @@
 		numBdes = iocbq->iocb.un.genreq64.bdl.bdeSize /
 			sizeof(struct ulp_bde64);
 		for (i = 0; i < numBdes; i++) {
-			if (bpl[i].tus.f.bdeFlags != BUFF_TYPE_BDE_64)
-				break;
 			bde.tus.w = le32_to_cpu(bpl[i].tus.w);
+			if (bde.tus.f.bdeFlags != BUFF_TYPE_BDE_64)
+				break;
 			xmit_len += bde.tus.f.bdeSize;
 		}
 		/* word3 iocb=IO_TAG wqe=request_payload_len */
@@ -6620,15 +6668,15 @@
 		xritag = 0;
 	break;
 	case CMD_XMIT_BLS_RSP64_CX:
-		/* As BLS ABTS-ACC WQE is very different from other WQEs,
+		/* As BLS ABTS RSP WQE is very different from other WQEs,
 		 * we re-construct this WQE here based on information in
 		 * iocbq from scratch.
 		 */
 		memset(wqe, 0, sizeof(union lpfc_wqe));
 		/* OX_ID is invariable to who sent ABTS to CT exchange */
 		bf_set(xmit_bls_rsp64_oxid, &wqe->xmit_bls_rsp,
-		       bf_get(lpfc_abts_oxid, &iocbq->iocb.un.bls_acc));
-		if (bf_get(lpfc_abts_orig, &iocbq->iocb.un.bls_acc) ==
+		       bf_get(lpfc_abts_oxid, &iocbq->iocb.un.bls_rsp));
+		if (bf_get(lpfc_abts_orig, &iocbq->iocb.un.bls_rsp) ==
 		    LPFC_ABTS_UNSOL_INT) {
 			/* ABTS sent by initiator to CT exchange, the
 			 * RX_ID field will be filled with the newly
@@ -6642,7 +6690,7 @@
 			 * RX_ID from ABTS.
 			 */
 			bf_set(xmit_bls_rsp64_rxid, &wqe->xmit_bls_rsp,
-			       bf_get(lpfc_abts_rxid, &iocbq->iocb.un.bls_acc));
+			       bf_get(lpfc_abts_rxid, &iocbq->iocb.un.bls_rsp));
 		}
 		bf_set(xmit_bls_rsp64_seqcnthi, &wqe->xmit_bls_rsp, 0xffff);
 		bf_set(wqe_xmit_bls_pt, &wqe->xmit_bls_rsp.wqe_dest, 0x1);
@@ -6653,6 +6701,15 @@
 		       LPFC_WQE_LENLOC_NONE);
 		/* Overwrite the pre-set comnd type with OTHER_COMMAND */
 		command_type = OTHER_COMMAND;
+		if (iocbq->iocb.un.xseq64.w5.hcsw.Rctl == FC_RCTL_BA_RJT) {
+			bf_set(xmit_bls_rsp64_rjt_vspec, &wqe->xmit_bls_rsp,
+			       bf_get(lpfc_vndr_code, &iocbq->iocb.un.bls_rsp));
+			bf_set(xmit_bls_rsp64_rjt_expc, &wqe->xmit_bls_rsp,
+			       bf_get(lpfc_rsn_expln, &iocbq->iocb.un.bls_rsp));
+			bf_set(xmit_bls_rsp64_rjt_rsnc, &wqe->xmit_bls_rsp,
+			       bf_get(lpfc_rsn_code, &iocbq->iocb.un.bls_rsp));
+		}
+
 	break;
 	case CMD_XRI_ABORTED_CX:
 	case CMD_CREATE_XRI_CR: /* Do we expect to use this? */
@@ -6701,7 +6758,8 @@
 
 	if (piocb->sli4_xritag == NO_XRI) {
 		if (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
-		    piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
+		    piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN ||
+		    piocb->iocb.ulpCommand == CMD_XMIT_BLS_RSP64_CX)
 			sglq = NULL;
 		else {
 			if (pring->txq_cnt) {
@@ -8194,7 +8252,8 @@
 	piocb->iocb_flag &= ~LPFC_IO_WAKE;
 
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
-		creg_val = readl(phba->HCregaddr);
+		if (lpfc_readl(phba->HCregaddr, &creg_val))
+			return IOCB_ERROR;
 		creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING);
 		writel(creg_val, phba->HCregaddr);
 		readl(phba->HCregaddr); /* flush */
@@ -8236,7 +8295,8 @@
 	}
 
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
-		creg_val = readl(phba->HCregaddr);
+		if (lpfc_readl(phba->HCregaddr, &creg_val))
+			return IOCB_ERROR;
 		creg_val &= ~(HC_R0INT_ENA << LPFC_FCP_RING);
 		writel(creg_val, phba->HCregaddr);
 		readl(phba->HCregaddr); /* flush */
@@ -8387,10 +8447,13 @@
 	uint32_t ha_copy;
 
 	/* Read chip Host Attention (HA) register */
-	ha_copy = readl(phba->HAregaddr);
+	if (lpfc_readl(phba->HAregaddr, &ha_copy))
+		goto unplug_err;
+
 	if (ha_copy & HA_ERATT) {
 		/* Read host status register to retrieve error event */
-		lpfc_sli_read_hs(phba);
+		if (lpfc_sli_read_hs(phba))
+			goto unplug_err;
 
 		/* Check if there is a deferred error condition is active */
 		if ((HS_FFER1 & phba->work_hs) &&
@@ -8409,6 +8472,15 @@
 		return 1;
 	}
 	return 0;
+
+unplug_err:
+	/* Set the driver HS work bitmap */
+	phba->work_hs |= UNPLUG_ERR;
+	/* Set the driver HA work bitmap */
+	phba->work_ha |= HA_ERATT;
+	/* Indicate polling handles this ERATT */
+	phba->hba_flag |= HBA_ERATT_HANDLED;
+	return 1;
 }
 
 /**
@@ -8436,8 +8508,15 @@
 	if_type = bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf);
 	switch (if_type) {
 	case LPFC_SLI_INTF_IF_TYPE_0:
-		uerr_sta_lo = readl(phba->sli4_hba.u.if_type0.UERRLOregaddr);
-		uerr_sta_hi = readl(phba->sli4_hba.u.if_type0.UERRHIregaddr);
+		if (lpfc_readl(phba->sli4_hba.u.if_type0.UERRLOregaddr,
+			&uerr_sta_lo) ||
+			lpfc_readl(phba->sli4_hba.u.if_type0.UERRHIregaddr,
+			&uerr_sta_hi)) {
+			phba->work_hs |= UNPLUG_ERR;
+			phba->work_ha |= HA_ERATT;
+			phba->hba_flag |= HBA_ERATT_HANDLED;
+			return 1;
+		}
 		if ((~phba->sli4_hba.ue_mask_lo & uerr_sta_lo) ||
 		    (~phba->sli4_hba.ue_mask_hi & uerr_sta_hi)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -8456,9 +8535,15 @@
 		}
 		break;
 	case LPFC_SLI_INTF_IF_TYPE_2:
-		portstat_reg.word0 =
-			readl(phba->sli4_hba.u.if_type2.STATUSregaddr);
-		portsmphr = readl(phba->sli4_hba.PSMPHRregaddr);
+		if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr,
+			&portstat_reg.word0) ||
+			lpfc_readl(phba->sli4_hba.PSMPHRregaddr,
+			&portsmphr)){
+			phba->work_hs |= UNPLUG_ERR;
+			phba->work_ha |= HA_ERATT;
+			phba->hba_flag |= HBA_ERATT_HANDLED;
+			return 1;
+		}
 		if (bf_get(lpfc_sliport_status_err, &portstat_reg)) {
 			phba->work_status[0] =
 				readl(phba->sli4_hba.u.if_type2.ERR1regaddr);
@@ -8639,7 +8724,8 @@
 			return IRQ_NONE;
 		/* Need to read HA REG for slow-path events */
 		spin_lock_irqsave(&phba->hbalock, iflag);
-		ha_copy = readl(phba->HAregaddr);
+		if (lpfc_readl(phba->HAregaddr, &ha_copy))
+			goto unplug_error;
 		/* If somebody is waiting to handle an eratt don't process it
 		 * here. The brdkill function will do this.
 		 */
@@ -8665,7 +8751,9 @@
 		}
 
 		/* Clear up only attention source related to slow-path */
-		hc_copy = readl(phba->HCregaddr);
+		if (lpfc_readl(phba->HCregaddr, &hc_copy))
+			goto unplug_error;
+
 		writel(hc_copy & ~(HC_MBINT_ENA | HC_R2INT_ENA |
 			HC_LAINT_ENA | HC_ERINT_ENA),
 			phba->HCregaddr);
@@ -8688,7 +8776,8 @@
 				 */
 				spin_lock_irqsave(&phba->hbalock, iflag);
 				phba->sli.sli_flag &= ~LPFC_PROCESS_LA;
-				control = readl(phba->HCregaddr);
+				if (lpfc_readl(phba->HCregaddr, &control))
+					goto unplug_error;
 				control &= ~HC_LAINT_ENA;
 				writel(control, phba->HCregaddr);
 				readl(phba->HCregaddr); /* flush */
@@ -8708,7 +8797,8 @@
 			status >>= (4*LPFC_ELS_RING);
 			if (status & HA_RXMASK) {
 				spin_lock_irqsave(&phba->hbalock, iflag);
-				control = readl(phba->HCregaddr);
+				if (lpfc_readl(phba->HCregaddr, &control))
+					goto unplug_error;
 
 				lpfc_debugfs_slow_ring_trc(phba,
 				"ISR slow ring:   ctl:x%x stat:x%x isrcnt:x%x",
@@ -8741,7 +8831,8 @@
 		}
 		spin_lock_irqsave(&phba->hbalock, iflag);
 		if (work_ha_copy & HA_ERATT) {
-			lpfc_sli_read_hs(phba);
+			if (lpfc_sli_read_hs(phba))
+				goto unplug_error;
 			/*
 			 * Check if there is a deferred error condition
 			 * is active
@@ -8872,6 +8963,9 @@
 		lpfc_worker_wake_up(phba);
 	}
 	return IRQ_HANDLED;
+unplug_error:
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+	return IRQ_HANDLED;
 
 } /* lpfc_sli_sp_intr_handler */
 
@@ -8919,7 +9013,8 @@
 		if (lpfc_intr_state_check(phba))
 			return IRQ_NONE;
 		/* Need to read HA REG for FCP ring and other ring events */
-		ha_copy = readl(phba->HAregaddr);
+		if (lpfc_readl(phba->HAregaddr, &ha_copy))
+			return IRQ_HANDLED;
 		/* Clear up only attention source related to fast-path */
 		spin_lock_irqsave(&phba->hbalock, iflag);
 		/*
@@ -9004,7 +9099,11 @@
 		return IRQ_NONE;
 
 	spin_lock(&phba->hbalock);
-	phba->ha_copy = readl(phba->HAregaddr);
+	if (lpfc_readl(phba->HAregaddr, &phba->ha_copy)) {
+		spin_unlock(&phba->hbalock);
+		return IRQ_HANDLED;
+	}
+
 	if (unlikely(!phba->ha_copy)) {
 		spin_unlock(&phba->hbalock);
 		return IRQ_NONE;
@@ -9026,7 +9125,10 @@
 	}
 
 	/* Clear attention sources except link and error attentions */
-	hc_copy = readl(phba->HCregaddr);
+	if (lpfc_readl(phba->HCregaddr, &hc_copy)) {
+		spin_unlock(&phba->hbalock);
+		return IRQ_HANDLED;
+	}
 	writel(hc_copy & ~(HC_MBINT_ENA | HC_R0INT_ENA | HC_R1INT_ENA
 		| HC_R2INT_ENA | HC_LAINT_ENA | HC_ERINT_ENA),
 		phba->HCregaddr);
@@ -10403,7 +10505,6 @@
 	if (!phba->sli4_hba.pc_sli4_params.supported)
 		hw_page_size = SLI4_PAGE_SIZE;
 
-
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox)
 		return -ENOMEM;
@@ -10413,11 +10514,22 @@
 			 LPFC_MBOX_OPCODE_CQ_CREATE,
 			 length, LPFC_SLI4_MBX_EMBED);
 	cq_create = &mbox->u.mqe.un.cq_create;
+	shdr = (union lpfc_sli4_cfg_shdr *) &cq_create->header.cfg_shdr;
 	bf_set(lpfc_mbx_cq_create_num_pages, &cq_create->u.request,
 		    cq->page_count);
 	bf_set(lpfc_cq_context_event, &cq_create->u.request.context, 1);
 	bf_set(lpfc_cq_context_valid, &cq_create->u.request.context, 1);
-	bf_set(lpfc_cq_eq_id, &cq_create->u.request.context, eq->queue_id);
+	bf_set(lpfc_mbox_hdr_version, &shdr->request,
+	       phba->sli4_hba.pc_sli4_params.cqv);
+	if (phba->sli4_hba.pc_sli4_params.cqv == LPFC_Q_CREATE_VERSION_2) {
+		bf_set(lpfc_mbx_cq_create_page_size, &cq_create->u.request,
+		       (PAGE_SIZE/SLI4_PAGE_SIZE));
+		bf_set(lpfc_cq_eq_id_2, &cq_create->u.request.context,
+		       eq->queue_id);
+	} else {
+		bf_set(lpfc_cq_eq_id, &cq_create->u.request.context,
+		       eq->queue_id);
+	}
 	switch (cq->entry_count) {
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -10449,7 +10561,6 @@
 	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
 
 	/* The IOCTL status is embedded in the mailbox subheader. */
-	shdr = (union lpfc_sli4_cfg_shdr *) &cq_create->header.cfg_shdr;
 	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
 	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
 	if (shdr_status || shdr_add_status || rc) {
@@ -10515,20 +10626,20 @@
 	bf_set(lpfc_mq_context_valid, &mq_create->u.request.context, 1);
 	switch (mq->entry_count) {
 	case 16:
-		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
-		       LPFC_MQ_CNT_16);
+		bf_set(lpfc_mq_context_ring_size, &mq_create->u.request.context,
+		       LPFC_MQ_RING_SIZE_16);
 		break;
 	case 32:
-		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
-		       LPFC_MQ_CNT_32);
+		bf_set(lpfc_mq_context_ring_size, &mq_create->u.request.context,
+		       LPFC_MQ_RING_SIZE_32);
 		break;
 	case 64:
-		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
-		       LPFC_MQ_CNT_64);
+		bf_set(lpfc_mq_context_ring_size, &mq_create->u.request.context,
+		       LPFC_MQ_RING_SIZE_64);
 		break;
 	case 128:
-		bf_set(lpfc_mq_context_count, &mq_create->u.request.context,
-		       LPFC_MQ_CNT_128);
+		bf_set(lpfc_mq_context_ring_size, &mq_create->u.request.context,
+		       LPFC_MQ_RING_SIZE_128);
 		break;
 	}
 	list_for_each_entry(dmabuf, &mq->page_list, list) {
@@ -10586,6 +10697,7 @@
 			 length, LPFC_SLI4_MBX_EMBED);
 
 	mq_create_ext = &mbox->u.mqe.un.mq_create_ext;
+	shdr = (union lpfc_sli4_cfg_shdr *) &mq_create_ext->header.cfg_shdr;
 	bf_set(lpfc_mbx_mq_create_ext_num_pages,
 	       &mq_create_ext->u.request, mq->page_count);
 	bf_set(lpfc_mbx_mq_create_ext_async_evt_link,
@@ -10598,9 +10710,15 @@
 	       &mq_create_ext->u.request, 1);
 	bf_set(lpfc_mbx_mq_create_ext_async_evt_sli,
 	       &mq_create_ext->u.request, 1);
-	bf_set(lpfc_mq_context_cq_id,
-	       &mq_create_ext->u.request.context, cq->queue_id);
 	bf_set(lpfc_mq_context_valid, &mq_create_ext->u.request.context, 1);
+	bf_set(lpfc_mbox_hdr_version, &shdr->request,
+	       phba->sli4_hba.pc_sli4_params.mqv);
+	if (phba->sli4_hba.pc_sli4_params.mqv == LPFC_Q_CREATE_VERSION_1)
+		bf_set(lpfc_mbx_mq_create_ext_cq_id, &mq_create_ext->u.request,
+		       cq->queue_id);
+	else
+		bf_set(lpfc_mq_context_cq_id, &mq_create_ext->u.request.context,
+		       cq->queue_id);
 	switch (mq->entry_count) {
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -10610,20 +10728,24 @@
 			return -EINVAL;
 		/* otherwise default to smallest count (drop through) */
 	case 16:
-		bf_set(lpfc_mq_context_count, &mq_create_ext->u.request.context,
-		       LPFC_MQ_CNT_16);
+		bf_set(lpfc_mq_context_ring_size,
+		       &mq_create_ext->u.request.context,
+		       LPFC_MQ_RING_SIZE_16);
 		break;
 	case 32:
-		bf_set(lpfc_mq_context_count, &mq_create_ext->u.request.context,
-		       LPFC_MQ_CNT_32);
+		bf_set(lpfc_mq_context_ring_size,
+		       &mq_create_ext->u.request.context,
+		       LPFC_MQ_RING_SIZE_32);
 		break;
 	case 64:
-		bf_set(lpfc_mq_context_count, &mq_create_ext->u.request.context,
-		       LPFC_MQ_CNT_64);
+		bf_set(lpfc_mq_context_ring_size,
+		       &mq_create_ext->u.request.context,
+		       LPFC_MQ_RING_SIZE_64);
 		break;
 	case 128:
-		bf_set(lpfc_mq_context_count, &mq_create_ext->u.request.context,
-		       LPFC_MQ_CNT_128);
+		bf_set(lpfc_mq_context_ring_size,
+		       &mq_create_ext->u.request.context,
+		       LPFC_MQ_RING_SIZE_128);
 		break;
 	}
 	list_for_each_entry(dmabuf, &mq->page_list, list) {
@@ -10634,7 +10756,6 @@
 					putPaddrHigh(dmabuf->phys);
 	}
 	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
-	shdr = (union lpfc_sli4_cfg_shdr *) &mq_create_ext->header.cfg_shdr;
 	mq->queue_id = bf_get(lpfc_mbx_mq_create_q_id,
 			      &mq_create_ext->u.response);
 	if (rc != MBX_SUCCESS) {
@@ -10711,6 +10832,7 @@
 	uint32_t shdr_status, shdr_add_status;
 	union lpfc_sli4_cfg_shdr *shdr;
 	uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
+	struct dma_address *page;
 
 	if (!phba->sli4_hba.pc_sli4_params.supported)
 		hw_page_size = SLI4_PAGE_SIZE;
@@ -10724,20 +10846,42 @@
 			 LPFC_MBOX_OPCODE_FCOE_WQ_CREATE,
 			 length, LPFC_SLI4_MBX_EMBED);
 	wq_create = &mbox->u.mqe.un.wq_create;
+	shdr = (union lpfc_sli4_cfg_shdr *) &wq_create->header.cfg_shdr;
 	bf_set(lpfc_mbx_wq_create_num_pages, &wq_create->u.request,
 		    wq->page_count);
 	bf_set(lpfc_mbx_wq_create_cq_id, &wq_create->u.request,
 		    cq->queue_id);
+	bf_set(lpfc_mbox_hdr_version, &shdr->request,
+	       phba->sli4_hba.pc_sli4_params.wqv);
+	if (phba->sli4_hba.pc_sli4_params.wqv == LPFC_Q_CREATE_VERSION_1) {
+		bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1,
+		       wq->entry_count);
+		switch (wq->entry_size) {
+		default:
+		case 64:
+			bf_set(lpfc_mbx_wq_create_wqe_size,
+			       &wq_create->u.request_1,
+			       LPFC_WQ_WQE_SIZE_64);
+			break;
+		case 128:
+			bf_set(lpfc_mbx_wq_create_wqe_size,
+			       &wq_create->u.request_1,
+			       LPFC_WQ_WQE_SIZE_128);
+			break;
+		}
+		bf_set(lpfc_mbx_wq_create_page_size, &wq_create->u.request_1,
+		       (PAGE_SIZE/SLI4_PAGE_SIZE));
+		page = wq_create->u.request_1.page;
+	} else {
+		page = wq_create->u.request.page;
+	}
 	list_for_each_entry(dmabuf, &wq->page_list, list) {
 		memset(dmabuf->virt, 0, hw_page_size);
-		wq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
-					putPaddrLow(dmabuf->phys);
-		wq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
-					putPaddrHigh(dmabuf->phys);
+		page[dmabuf->buffer_tag].addr_lo = putPaddrLow(dmabuf->phys);
+		page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys);
 	}
 	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
 	/* The IOCTL status is embedded in the mailbox subheader. */
-	shdr = (union lpfc_sli4_cfg_shdr *) &wq_create->header.cfg_shdr;
 	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
 	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
 	if (shdr_status || shdr_add_status || rc) {
@@ -10815,37 +10959,51 @@
 			 LPFC_MBOX_OPCODE_FCOE_RQ_CREATE,
 			 length, LPFC_SLI4_MBX_EMBED);
 	rq_create = &mbox->u.mqe.un.rq_create;
-	switch (hrq->entry_count) {
-	default:
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"2535 Unsupported RQ count. (%d)\n",
-				hrq->entry_count);
-		if (hrq->entry_count < 512)
-			return -EINVAL;
-		/* otherwise default to smallest count (drop through) */
-	case 512:
-		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
-		       LPFC_RQ_RING_SIZE_512);
-		break;
-	case 1024:
-		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
-		       LPFC_RQ_RING_SIZE_1024);
-		break;
-	case 2048:
-		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
-		       LPFC_RQ_RING_SIZE_2048);
-		break;
-	case 4096:
-		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
-		       LPFC_RQ_RING_SIZE_4096);
-		break;
+	shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr;
+	bf_set(lpfc_mbox_hdr_version, &shdr->request,
+	       phba->sli4_hba.pc_sli4_params.rqv);
+	if (phba->sli4_hba.pc_sli4_params.rqv == LPFC_Q_CREATE_VERSION_1) {
+		bf_set(lpfc_rq_context_rqe_count_1,
+		       &rq_create->u.request.context,
+		       hrq->entry_count);
+		rq_create->u.request.context.buffer_size = LPFC_HDR_BUF_SIZE;
+	} else {
+		switch (hrq->entry_count) {
+		default:
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"2535 Unsupported RQ count. (%d)\n",
+					hrq->entry_count);
+			if (hrq->entry_count < 512)
+				return -EINVAL;
+			/* otherwise default to smallest count (drop through) */
+		case 512:
+			bf_set(lpfc_rq_context_rqe_count,
+			       &rq_create->u.request.context,
+			       LPFC_RQ_RING_SIZE_512);
+			break;
+		case 1024:
+			bf_set(lpfc_rq_context_rqe_count,
+			       &rq_create->u.request.context,
+			       LPFC_RQ_RING_SIZE_1024);
+			break;
+		case 2048:
+			bf_set(lpfc_rq_context_rqe_count,
+			       &rq_create->u.request.context,
+			       LPFC_RQ_RING_SIZE_2048);
+			break;
+		case 4096:
+			bf_set(lpfc_rq_context_rqe_count,
+			       &rq_create->u.request.context,
+			       LPFC_RQ_RING_SIZE_4096);
+			break;
+		}
+		bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
+		       LPFC_HDR_BUF_SIZE);
 	}
 	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
 	       cq->queue_id);
 	bf_set(lpfc_mbx_rq_create_num_pages, &rq_create->u.request,
 	       hrq->page_count);
-	bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
-	       LPFC_HDR_BUF_SIZE);
 	list_for_each_entry(dmabuf, &hrq->page_list, list) {
 		memset(dmabuf->virt, 0, hw_page_size);
 		rq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
@@ -10855,7 +11013,6 @@
 	}
 	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
 	/* The IOCTL status is embedded in the mailbox subheader. */
-	shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr;
 	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
 	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
 	if (shdr_status || shdr_add_status || rc) {
@@ -10881,37 +11038,50 @@
 	lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
 			 LPFC_MBOX_OPCODE_FCOE_RQ_CREATE,
 			 length, LPFC_SLI4_MBX_EMBED);
-	switch (drq->entry_count) {
-	default:
-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"2536 Unsupported RQ count. (%d)\n",
-				drq->entry_count);
-		if (drq->entry_count < 512)
-			return -EINVAL;
-		/* otherwise default to smallest count (drop through) */
-	case 512:
-		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
-		       LPFC_RQ_RING_SIZE_512);
-		break;
-	case 1024:
-		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
-		       LPFC_RQ_RING_SIZE_1024);
-		break;
-	case 2048:
-		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
-		       LPFC_RQ_RING_SIZE_2048);
-		break;
-	case 4096:
-		bf_set(lpfc_rq_context_rq_size, &rq_create->u.request.context,
-		       LPFC_RQ_RING_SIZE_4096);
-		break;
+	bf_set(lpfc_mbox_hdr_version, &shdr->request,
+	       phba->sli4_hba.pc_sli4_params.rqv);
+	if (phba->sli4_hba.pc_sli4_params.rqv == LPFC_Q_CREATE_VERSION_1) {
+		bf_set(lpfc_rq_context_rqe_count_1,
+		       &rq_create->u.request.context,
+		       hrq->entry_count);
+		rq_create->u.request.context.buffer_size = LPFC_DATA_BUF_SIZE;
+	} else {
+		switch (drq->entry_count) {
+		default:
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"2536 Unsupported RQ count. (%d)\n",
+					drq->entry_count);
+			if (drq->entry_count < 512)
+				return -EINVAL;
+			/* otherwise default to smallest count (drop through) */
+		case 512:
+			bf_set(lpfc_rq_context_rqe_count,
+			       &rq_create->u.request.context,
+			       LPFC_RQ_RING_SIZE_512);
+			break;
+		case 1024:
+			bf_set(lpfc_rq_context_rqe_count,
+			       &rq_create->u.request.context,
+			       LPFC_RQ_RING_SIZE_1024);
+			break;
+		case 2048:
+			bf_set(lpfc_rq_context_rqe_count,
+			       &rq_create->u.request.context,
+			       LPFC_RQ_RING_SIZE_2048);
+			break;
+		case 4096:
+			bf_set(lpfc_rq_context_rqe_count,
+			       &rq_create->u.request.context,
+			       LPFC_RQ_RING_SIZE_4096);
+			break;
+		}
+		bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
+		       LPFC_DATA_BUF_SIZE);
 	}
 	bf_set(lpfc_rq_context_cq_id, &rq_create->u.request.context,
 	       cq->queue_id);
 	bf_set(lpfc_mbx_rq_create_num_pages, &rq_create->u.request,
 	       drq->page_count);
-	bf_set(lpfc_rq_context_buf_size, &rq_create->u.request.context,
-	       LPFC_DATA_BUF_SIZE);
 	list_for_each_entry(dmabuf, &drq->page_list, list) {
 		rq_create->u.request.page[dmabuf->buffer_tag].addr_lo =
 					putPaddrLow(dmabuf->phys);
@@ -11580,6 +11750,7 @@
 	static char *rctl_names[] = FC_RCTL_NAMES_INIT;
 	char *type_names[] = FC_TYPE_NAMES_INIT;
 	struct fc_vft_header *fc_vft_hdr;
+	uint32_t *header = (uint32_t *) fc_hdr;
 
 	switch (fc_hdr->fh_r_ctl) {
 	case FC_RCTL_DD_UNCAT:		/* uncategorized information */
@@ -11628,10 +11799,15 @@
 	default:
 		goto drop;
 	}
+
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
-			"2538 Received frame rctl:%s type:%s\n",
+			"2538 Received frame rctl:%s type:%s "
+			"Frame Data:%08x %08x %08x %08x %08x %08x\n",
 			rctl_names[fc_hdr->fh_r_ctl],
-			type_names[fc_hdr->fh_type]);
+			type_names[fc_hdr->fh_type],
+			be32_to_cpu(header[0]), be32_to_cpu(header[1]),
+			be32_to_cpu(header[2]), be32_to_cpu(header[3]),
+			be32_to_cpu(header[4]), be32_to_cpu(header[5]));
 	return 0;
 drop:
 	lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
@@ -11928,17 +12104,17 @@
 }
 
 /**
- * lpfc_sli4_seq_abort_acc_cmpl - Accept seq abort iocb complete handler
+ * lpfc_sli4_seq_abort_rsp_cmpl - BLS ABORT RSP seq abort iocb complete handler
  * @phba: Pointer to HBA context object.
  * @cmd_iocbq: pointer to the command iocbq structure.
  * @rsp_iocbq: pointer to the response iocbq structure.
  *
- * This function handles the sequence abort accept iocb command complete
+ * This function handles the sequence abort response iocb command complete
  * event. It properly releases the memory allocated to the sequence abort
  * accept iocb.
  **/
 static void
-lpfc_sli4_seq_abort_acc_cmpl(struct lpfc_hba *phba,
+lpfc_sli4_seq_abort_rsp_cmpl(struct lpfc_hba *phba,
 			     struct lpfc_iocbq *cmd_iocbq,
 			     struct lpfc_iocbq *rsp_iocbq)
 {
@@ -11947,15 +12123,15 @@
 }
 
 /**
- * lpfc_sli4_seq_abort_acc - Accept sequence abort
+ * lpfc_sli4_seq_abort_rsp - bls rsp to sequence abort
  * @phba: Pointer to HBA context object.
  * @fc_hdr: pointer to a FC frame header.
  *
- * This function sends a basic accept to a previous unsol sequence abort
+ * This function sends a basic response to a previous unsol sequence abort
  * event after aborting the sequence handling.
  **/
 static void
-lpfc_sli4_seq_abort_acc(struct lpfc_hba *phba,
+lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
 			struct fc_frame_header *fc_hdr)
 {
 	struct lpfc_iocbq *ctiocb = NULL;
@@ -11963,6 +12139,7 @@
 	uint16_t oxid, rxid;
 	uint32_t sid, fctl;
 	IOCB_t *icmd;
+	int rc;
 
 	if (!lpfc_is_link_up(phba))
 		return;
@@ -11983,7 +12160,7 @@
 		+ phba->sli4_hba.max_cfg_param.xri_base))
 		lpfc_set_rrq_active(phba, ndlp, rxid, oxid, 0);
 
-	/* Allocate buffer for acc iocb */
+	/* Allocate buffer for rsp iocb */
 	ctiocb = lpfc_sli_get_iocbq(phba);
 	if (!ctiocb)
 		return;
@@ -12008,32 +12185,54 @@
 
 	ctiocb->iocb_cmpl = NULL;
 	ctiocb->vport = phba->pport;
-	ctiocb->iocb_cmpl = lpfc_sli4_seq_abort_acc_cmpl;
+	ctiocb->iocb_cmpl = lpfc_sli4_seq_abort_rsp_cmpl;
+	ctiocb->sli4_xritag = NO_XRI;
+
+	/* If the oxid maps to the FCP XRI range or if it is out of range,
+	 * send a BLS_RJT.  The driver no longer has that exchange.
+	 * Override the IOCB for a BA_RJT.
+	 */
+	if (oxid > (phba->sli4_hba.max_cfg_param.max_xri +
+		    phba->sli4_hba.max_cfg_param.xri_base) ||
+	    oxid > (lpfc_sli4_get_els_iocb_cnt(phba) +
+		    phba->sli4_hba.max_cfg_param.xri_base)) {
+		icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT;
+		bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0);
+		bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID);
+		bf_set(lpfc_rsn_code, &icmd->un.bls_rsp, FC_BA_RJT_UNABLE);
+	}
 
 	if (fctl & FC_FC_EX_CTX) {
 		/* ABTS sent by responder to CT exchange, construction
 		 * of BA_ACC will use OX_ID from ABTS for the XRI_TAG
 		 * field and RX_ID from ABTS for RX_ID field.
 		 */
-		bf_set(lpfc_abts_orig, &icmd->un.bls_acc, LPFC_ABTS_UNSOL_RSP);
-		bf_set(lpfc_abts_rxid, &icmd->un.bls_acc, rxid);
-		ctiocb->sli4_xritag = oxid;
+		bf_set(lpfc_abts_orig, &icmd->un.bls_rsp, LPFC_ABTS_UNSOL_RSP);
+		bf_set(lpfc_abts_rxid, &icmd->un.bls_rsp, rxid);
 	} else {
 		/* ABTS sent by initiator to CT exchange, construction
 		 * of BA_ACC will need to allocate a new XRI as for the
 		 * XRI_TAG and RX_ID fields.
 		 */
-		bf_set(lpfc_abts_orig, &icmd->un.bls_acc, LPFC_ABTS_UNSOL_INT);
-		bf_set(lpfc_abts_rxid, &icmd->un.bls_acc, NO_XRI);
-		ctiocb->sli4_xritag = NO_XRI;
+		bf_set(lpfc_abts_orig, &icmd->un.bls_rsp, LPFC_ABTS_UNSOL_INT);
+		bf_set(lpfc_abts_rxid, &icmd->un.bls_rsp, NO_XRI);
 	}
-	bf_set(lpfc_abts_oxid, &icmd->un.bls_acc, oxid);
+	bf_set(lpfc_abts_oxid, &icmd->un.bls_rsp, oxid);
 
-	/* Xmit CT abts accept on exchange <xid> */
+	/* Xmit CT abts response on exchange <xid> */
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
-			"1200 Xmit CT ABTS ACC on exchange x%x Data: x%x\n",
-			CMD_XMIT_BLS_RSP64_CX, phba->link_state);
-	lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0);
+			"1200 Send BLS cmd x%x on oxid x%x Data: x%x\n",
+			icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state);
+
+	rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0);
+	if (rc == IOCB_ERROR) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
+				"2925 Failed to issue CT ABTS RSP x%x on "
+				"xri x%x, Data x%x\n",
+				icmd->un.xseq64.w5.hcsw.Rctl, oxid,
+				phba->link_state);
+		lpfc_sli_release_iocbq(phba, ctiocb);
+	}
 }
 
 /**
@@ -12081,7 +12280,7 @@
 			lpfc_in_buf_free(phba, &dmabuf->dbuf);
 	}
 	/* Send basic accept (BA_ACC) to the abort requester */
-	lpfc_sli4_seq_abort_acc(phba, &fc_hdr);
+	lpfc_sli4_seq_abort_rsp(phba, &fc_hdr);
 }
 
 /**

diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 595056b..1a3cbf8 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h

@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009 Emulex.  All rights reserved.                *
+ * Copyright (C) 2009-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  *                                                                 *

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 0a4d376..2404d1d 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h

@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.3.21"
+#define LPFC_DRIVER_VERSION "8.3.22"
 #define LPFC_DRIVER_NAME		"lpfc"
 #define LPFC_SP_DRIVER_HANDLER_NAME	"lpfc:sp"
 #define LPFC_FP_DRIVER_HANDLER_NAME	"lpfc:fp"

diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index e8a6f1c..5e001ff 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c

@@ -1748,6 +1748,54 @@
 }
 
 /**
+ * _base_display_hp_branding - Display branding string
+ * @ioc: per adapter object
+ *
+ * Return nothing.
+ */
+static void
+_base_display_hp_branding(struct MPT2SAS_ADAPTER *ioc)
+{
+	if (ioc->pdev->subsystem_vendor != MPT2SAS_HP_3PAR_SSVID)
+		return;
+
+	switch (ioc->pdev->device) {
+	case MPI2_MFGPAGE_DEVID_SAS2004:
+		switch (ioc->pdev->subsystem_device) {
+		case MPT2SAS_HP_DAUGHTER_2_4_INTERNAL_SSDID:
+			printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
+			    MPT2SAS_HP_DAUGHTER_2_4_INTERNAL_BRANDING);
+			break;
+		default:
+			break;
+		}
+	case MPI2_MFGPAGE_DEVID_SAS2308_2:
+		switch (ioc->pdev->subsystem_device) {
+		case MPT2SAS_HP_2_4_INTERNAL_SSDID:
+			printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
+			    MPT2SAS_HP_2_4_INTERNAL_BRANDING);
+			break;
+		case MPT2SAS_HP_2_4_EXTERNAL_SSDID:
+			printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
+			    MPT2SAS_HP_2_4_EXTERNAL_BRANDING);
+			break;
+		case MPT2SAS_HP_1_4_INTERNAL_1_4_EXTERNAL_SSDID:
+			printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
+			    MPT2SAS_HP_1_4_INTERNAL_1_4_EXTERNAL_BRANDING);
+			break;
+		case MPT2SAS_HP_EMBEDDED_2_4_INTERNAL_SSDID:
+			printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
+			    MPT2SAS_HP_EMBEDDED_2_4_INTERNAL_BRANDING);
+			break;
+		default:
+			break;
+		}
+	default:
+		break;
+	}
+}
+
+/**
  * _base_display_ioc_capabilities - Disply IOC's capabilities.
  * @ioc: per adapter object
  *
@@ -1778,6 +1826,7 @@
 
 	_base_display_dell_branding(ioc);
 	_base_display_intel_branding(ioc);
+	_base_display_hp_branding(ioc);
 
 	printk(MPT2SAS_INFO_FMT "Protocol=(", ioc->name);
 

diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index a3f8aa9..5003282 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h

@@ -168,6 +168,26 @@
 #define MPT2SAS_INTEL_RMS2LL080_SSDID          0x350E
 #define MPT2SAS_INTEL_RMS2LL040_SSDID          0x350F
 
+
+/*
+ * HP HBA branding
+ */
+#define MPT2SAS_HP_3PAR_SSVID                0x1590
+#define MPT2SAS_HP_2_4_INTERNAL_BRANDING        "HP H220 Host Bus Adapter"
+#define MPT2SAS_HP_2_4_EXTERNAL_BRANDING        "HP H221 Host Bus Adapter"
+#define MPT2SAS_HP_1_4_INTERNAL_1_4_EXTERNAL_BRANDING "HP H222 Host Bus Adapter"
+#define MPT2SAS_HP_EMBEDDED_2_4_INTERNAL_BRANDING    "HP H220i Host Bus Adapter"
+#define MPT2SAS_HP_DAUGHTER_2_4_INTERNAL_BRANDING    "HP H210i Host Bus Adapter"
+
+/*
+ * HO HBA SSDIDs
+ */
+#define MPT2SAS_HP_2_4_INTERNAL_SSDID            0x0041
+#define MPT2SAS_HP_2_4_EXTERNAL_SSDID            0x0042
+#define MPT2SAS_HP_1_4_INTERNAL_1_4_EXTERNAL_SSDID    0x0043
+#define MPT2SAS_HP_EMBEDDED_2_4_INTERNAL_SSDID        0x0044
+#define MPT2SAS_HP_DAUGHTER_2_4_INTERNAL_SSDID        0x0046
+
 /*
  * per target private data
  */

diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index 19ad34f..938d045 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c

@@ -663,6 +663,13 @@
 	{ PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1300), chip_1300 },
 	{ PCI_VDEVICE(ARECA, PCI_DEVICE_ID_ARECA_1320), chip_1320 },
 	{ PCI_VDEVICE(ADAPTEC2, 0x0450), chip_6440 },
+	{ PCI_VDEVICE(TTI, 0x2710), chip_9480 },
+	{ PCI_VDEVICE(TTI, 0x2720), chip_9480 },
+	{ PCI_VDEVICE(TTI, 0x2721), chip_9480 },
+	{ PCI_VDEVICE(TTI, 0x2722), chip_9480 },
+	{ PCI_VDEVICE(TTI, 0x2740), chip_9480 },
+	{ PCI_VDEVICE(TTI, 0x2744), chip_9480 },
+	{ PCI_VDEVICE(TTI, 0x2760), chip_9480 },
 
 	{ }	/* terminate list */
 };

diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h
index 2fc0045..c1f8d1b 100644
--- a/drivers/scsi/qla4xxx/ql4_def.h
+++ b/drivers/scsi/qla4xxx/ql4_def.h

@@ -53,6 +53,9 @@
 #define PCI_DEVICE_ID_QLOGIC_ISP8022	0x8022
 #endif
 
+#define ISP4XXX_PCI_FN_1	0x1
+#define ISP4XXX_PCI_FN_2	0x3
+
 #define QLA_SUCCESS			0
 #define QLA_ERROR			1
 
@@ -233,9 +236,6 @@
 
 	unsigned long flags;	/* DDB Flags */
 
-	unsigned long dev_scan_wait_to_start_relogin;
-	unsigned long dev_scan_wait_to_complete_relogin;
-
 	uint16_t fw_ddb_index;	/* DDB firmware index */
 	uint16_t options;
 	uint32_t fw_ddb_device_state; /* F/W Device State  -- see ql4_fw.h */
@@ -289,8 +289,6 @@
  * DDB flags.
  */
 #define DF_RELOGIN		0	/* Relogin to device */
-#define DF_NO_RELOGIN		1	/* Do not relogin if IOCTL
-					 * logged it out */
 #define DF_ISNS_DISCOVERED	2	/* Device was discovered via iSNS */
 #define DF_FO_MASKED		3
 
@@ -376,7 +374,7 @@
 #define AF_LINK_UP			8 /* 0x00000100 */
 #define AF_IRQ_ATTACHED			10 /* 0x00000400 */
 #define AF_DISABLE_ACB_COMPLETE		11 /* 0x00000800 */
-#define AF_HBA_GOING_AWAY		12 /* 0x00001000 */
+#define AF_HA_REMOVAL			12 /* 0x00001000 */
 #define AF_INTx_ENABLED			15 /* 0x00008000 */
 #define AF_MSI_ENABLED			16 /* 0x00010000 */
 #define AF_MSIX_ENABLED			17 /* 0x00020000 */
@@ -479,7 +477,6 @@
 	uint32_t timer_active;
 
 	/* Recovery Timers */
-	uint32_t discovery_wait;
 	atomic_t check_relogin_timeouts;
 	uint32_t retry_reset_ha_cnt;
 	uint32_t isp_reset_timer;	/* reset test timer */
@@ -765,6 +762,5 @@
 /* Defines for process_aen() */
 #define PROCESS_ALL_AENS	 0
 #define FLUSH_DDB_CHANGED_AENS	 1
-#define RELOGIN_DDB_CHANGED_AENS 2
 
 #endif	/*_QLA4XXX_H */

diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
index c198579..31e2bf9 100644
--- a/drivers/scsi/qla4xxx/ql4_fw.h
+++ b/drivers/scsi/qla4xxx/ql4_fw.h

@@ -455,6 +455,7 @@
 	uint8_t res0;	/* 07 */
 	uint16_t eth_mtu_size;	/* 08-09 */
 	uint16_t add_fw_options;	/* 0A-0B */
+#define SERIALIZE_TASK_MGMT		0x0400
 
 	uint8_t hb_interval;	/* 0C */
 	uint8_t inst_num; /* 0D */

diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h
index 8fad99b..cc53e3f 100644
--- a/drivers/scsi/qla4xxx/ql4_glbl.h
+++ b/drivers/scsi/qla4xxx/ql4_glbl.h

@@ -136,7 +136,6 @@
 void qla4_8xxx_set_drv_active(struct scsi_qla_host *ha);
 
 extern int ql4xextended_error_logging;
-extern int ql4xdiscoverywait;
 extern int ql4xdontresethba;
 extern int ql4xenablemsix;
 

diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c
index 1629c48..bbb2e90 100644
--- a/drivers/scsi/qla4xxx/ql4_init.c
+++ b/drivers/scsi/qla4xxx/ql4_init.c

@@ -723,13 +723,38 @@
 	return relogin;
 }
 
+static void qla4xxx_flush_AENS(struct scsi_qla_host *ha)
+{
+	unsigned long wtime;
+
+	/* Flush the 0x8014 AEN from the firmware as a result of
+	 * Auto connect. We are basically doing get_firmware_ddb()
+	 * to determine whether we need to log back in or not.
+	 * Trying to do a set ddb before we have processed 0x8014
+	 * will result in another set_ddb() for the same ddb. In other
+	 * words there will be stale entries in the aen_q.
+	 */
+	wtime = jiffies + (2 * HZ);
+	do {
+		if (qla4xxx_get_firmware_state(ha) == QLA_SUCCESS)
+			if (ha->firmware_state & (BIT_2 | BIT_0))
+				return;
+
+		if (test_and_clear_bit(DPC_AEN, &ha->dpc_flags))
+			qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
+
+		msleep(1000);
+	} while (!time_after_eq(jiffies, wtime));
+}
+
 /**
- * qla4xxx_configure_ddbs - builds driver ddb list
+ * qla4xxx_build_ddb_list - builds driver ddb list
  * @ha: Pointer to host adapter structure.
  *
  * This routine searches for all valid firmware ddb entries and builds
  * an internal ddb list. Ddbs that are considered valid are those with
  * a device state of SESSION_ACTIVE.
+ * A relogin (set_ddb) is issued for DDBs that are not online.
  **/
 static int qla4xxx_build_ddb_list(struct scsi_qla_host *ha)
 {
@@ -744,6 +769,8 @@
 	uint32_t ipv6_device;
 	uint32_t new_tgt;
 
+	qla4xxx_flush_AENS(ha);
+
 	fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
 			&fw_ddb_entry_dma, GFP_KERNEL);
 	if (fw_ddb_entry == NULL) {
@@ -847,144 +874,6 @@
 	return status;
 }
 
-struct qla4_relog_scan {
-	int halt_wait;
-	uint32_t conn_err;
-	uint32_t fw_ddb_index;
-	uint32_t next_fw_ddb_index;
-	uint32_t fw_ddb_device_state;
-};
-
-static int qla4_test_rdy(struct scsi_qla_host *ha, struct qla4_relog_scan *rs)
-{
-	struct ddb_entry *ddb_entry;
-
-	if (qla4_is_relogin_allowed(ha, rs->conn_err)) {
-		/* We either have a device that is in
-		 * the process of relogging in or a
-		 * device that is waiting to be
-		 * relogged in */
-		rs->halt_wait = 0;
-
-		ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha,
-							   rs->fw_ddb_index);
-		if (ddb_entry == NULL)
-			return QLA_ERROR;
-
-		if (ddb_entry->dev_scan_wait_to_start_relogin != 0
-		    && time_after_eq(jiffies,
-				     ddb_entry->
-				     dev_scan_wait_to_start_relogin))
-		{
-			ddb_entry->dev_scan_wait_to_start_relogin = 0;
-			qla4xxx_set_ddb_entry(ha, rs->fw_ddb_index, 0);
-		}
-	}
-	return QLA_SUCCESS;
-}
-
-static int qla4_scan_for_relogin(struct scsi_qla_host *ha,
-				 struct qla4_relog_scan *rs)
-{
-	int error;
-
-	/* scan for relogins
-	 * ----------------- */
-	for (rs->fw_ddb_index = 0; rs->fw_ddb_index < MAX_DDB_ENTRIES;
-	     rs->fw_ddb_index = rs->next_fw_ddb_index) {
-		if (qla4xxx_get_fwddb_entry(ha, rs->fw_ddb_index, NULL, 0,
-					    NULL, &rs->next_fw_ddb_index,
-					    &rs->fw_ddb_device_state,
-					    &rs->conn_err, NULL, NULL)
-		    == QLA_ERROR)
-			return QLA_ERROR;
-
-		if (rs->fw_ddb_device_state == DDB_DS_LOGIN_IN_PROCESS)
-			rs->halt_wait = 0;
-
-		if (rs->fw_ddb_device_state == DDB_DS_SESSION_FAILED ||
-		    rs->fw_ddb_device_state == DDB_DS_NO_CONNECTION_ACTIVE) {
-			error = qla4_test_rdy(ha, rs);
-			if (error)
-				return error;
-		}
-
-		/* We know we've reached the last device when
-		 * next_fw_ddb_index is 0 */
-		if (rs->next_fw_ddb_index == 0)
-			break;
-	}
-	return QLA_SUCCESS;
-}
-
-/**
- * qla4xxx_devices_ready - wait for target devices to be logged in
- * @ha: pointer to adapter structure
- *
- * This routine waits up to ql4xdiscoverywait seconds
- * F/W database during driver load time.
- **/
-static int qla4xxx_devices_ready(struct scsi_qla_host *ha)
-{
-	int error;
-	unsigned long discovery_wtime;
-	struct qla4_relog_scan rs;
-
-	discovery_wtime = jiffies + (ql4xdiscoverywait * HZ);
-
-	DEBUG(printk("Waiting (%d) for devices ...\n", ql4xdiscoverywait));
-	do {
-		/* poll for AEN. */
-		qla4xxx_get_firmware_state(ha);
-		if (test_and_clear_bit(DPC_AEN, &ha->dpc_flags)) {
-			/* Set time-between-relogin timer */
-			qla4xxx_process_aen(ha, RELOGIN_DDB_CHANGED_AENS);
-		}
-
-		/* if no relogins active or needed, halt discvery wait */
-		rs.halt_wait = 1;
-
-		error = qla4_scan_for_relogin(ha, &rs);
-
-		if (rs.halt_wait) {
-			DEBUG2(printk("scsi%ld: %s: Delay halted.  Devices "
-				      "Ready.\n", ha->host_no, __func__));
-			return QLA_SUCCESS;
-		}
-
-		msleep(2000);
-	} while (!time_after_eq(jiffies, discovery_wtime));
-
-	DEBUG3(qla4xxx_get_conn_event_log(ha));
-
-	return QLA_SUCCESS;
-}
-
-static void qla4xxx_flush_AENS(struct scsi_qla_host *ha)
-{
-	unsigned long wtime;
-
-	/* Flush the 0x8014 AEN from the firmware as a result of
-	 * Auto connect. We are basically doing get_firmware_ddb()
-	 * to determine whether we need to log back in or not.
-	 *  Trying to do a set ddb before we have processed 0x8014
-	 *  will result in another set_ddb() for the same ddb. In other
-	 *  words there will be stale entries in the aen_q.
-	 */
-	wtime = jiffies + (2 * HZ);
-	do {
-		if (qla4xxx_get_firmware_state(ha) == QLA_SUCCESS)
-			if (ha->firmware_state & (BIT_2 | BIT_0))
-				return;
-
-		if (test_and_clear_bit(DPC_AEN, &ha->dpc_flags))
-			qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
-
-		msleep(1000);
-	} while (!time_after_eq(jiffies, wtime));
-
-}
-
 static int qla4xxx_initialize_ddb_list(struct scsi_qla_host *ha)
 {
 	uint16_t fw_ddb_index;
@@ -996,29 +885,12 @@
 
 	for (fw_ddb_index = 0; fw_ddb_index < MAX_DDB_ENTRIES; fw_ddb_index++)
 		ha->fw_ddb_index_map[fw_ddb_index] =
-			(struct ddb_entry *)INVALID_ENTRY;
+		    (struct ddb_entry *)INVALID_ENTRY;
 
 	ha->tot_ddbs = 0;
 
-	qla4xxx_flush_AENS(ha);
-
-	/* Wait for an AEN */
-	qla4xxx_devices_ready(ha);
-
-	/*
-	 * First perform device discovery for active
-	 * fw ddb indexes and build
-	 * ddb list.
-	 */
-	if ((status = qla4xxx_build_ddb_list(ha)) == QLA_ERROR)
-		return status;
-
-	/*
-	 * Targets can come online after the inital discovery, so processing
-	 * the aens here will catch them.
-	 */
-	if (test_and_clear_bit(DPC_AEN, &ha->dpc_flags))
-		qla4xxx_process_aen(ha, PROCESS_ALL_AENS);
+	/* Perform device discovery and build ddb list. */
+	status = qla4xxx_build_ddb_list(ha);
 
 	return status;
 }
@@ -1537,7 +1409,6 @@
 		uint32_t state, uint32_t conn_err)
 {
 	struct ddb_entry * ddb_entry;
-	uint32_t old_fw_ddb_device_state;
 
 	/* check for out of range index */
 	if (fw_ddb_index >= MAX_DDB_ENTRIES)
@@ -1553,27 +1424,18 @@
 	}
 
 	/* Device already exists in our database. */
-	old_fw_ddb_device_state = ddb_entry->fw_ddb_device_state;
 	DEBUG2(printk("scsi%ld: %s DDB - old state= 0x%x, new state=0x%x for "
 		      "index [%d]\n", ha->host_no, __func__,
 		      ddb_entry->fw_ddb_device_state, state, fw_ddb_index));
-	if (old_fw_ddb_device_state == state &&
-	    state == DDB_DS_SESSION_ACTIVE) {
-		if (atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE) {
-			atomic_set(&ddb_entry->state, DDB_STATE_ONLINE);
-			iscsi_unblock_session(ddb_entry->sess);
-		}
-		return QLA_SUCCESS;
-	}
 
 	ddb_entry->fw_ddb_device_state = state;
 	/* Device is back online. */
-	if (ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_ACTIVE) {
+	if ((ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_ACTIVE) &&
+	   (atomic_read(&ddb_entry->state) != DDB_STATE_ONLINE)) {
 		atomic_set(&ddb_entry->state, DDB_STATE_ONLINE);
 		atomic_set(&ddb_entry->relogin_retry_count, 0);
 		atomic_set(&ddb_entry->relogin_timer, 0);
 		clear_bit(DF_RELOGIN, &ddb_entry->flags);
-		clear_bit(DF_NO_RELOGIN, &ddb_entry->flags);
 		iscsi_unblock_session(ddb_entry->sess);
 		iscsi_session_event(ddb_entry->sess,
 				    ISCSI_KEVENT_CREATE_SESSION);
@@ -1581,7 +1443,7 @@
 		 * Change the lun state to READY in case the lun TIMEOUT before
 		 * the device came back.
 		 */
-	} else {
+	} else if (ddb_entry->fw_ddb_device_state != DDB_DS_SESSION_ACTIVE) {
 		/* Device went away, mark device missing */
 		if (atomic_read(&ddb_entry->state) == DDB_STATE_ONLINE) {
 			DEBUG2(ql4_printk(KERN_INFO, ha, "%s mark missing "
@@ -1598,7 +1460,6 @@
 		 */
 		if (ddb_entry->fw_ddb_device_state == DDB_DS_SESSION_FAILED &&
 		    !test_bit(DF_RELOGIN, &ddb_entry->flags) &&
-		    !test_bit(DF_NO_RELOGIN, &ddb_entry->flags) &&
 		    qla4_is_relogin_allowed(ha, conn_err)) {
 			/*
 			 * This triggers a relogin.  After the relogin_timer

diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c
index 03e028e..2f40ac7 100644
--- a/drivers/scsi/qla4xxx/ql4_isr.c
+++ b/drivers/scsi/qla4xxx/ql4_isr.c

@@ -801,7 +801,7 @@
 			       &ha->reg->ctrl_status);
 			readl(&ha->reg->ctrl_status);
 
-			if (!test_bit(AF_HBA_GOING_AWAY, &ha->flags))
+			if (!test_bit(AF_HA_REMOVAL, &ha->flags))
 				set_bit(DPC_RESET_HA_INTR, &ha->dpc_flags);
 
 			break;
@@ -1008,34 +1008,9 @@
 					      mbox_sts[0], mbox_sts[2],
 					      mbox_sts[3]));
 				break;
-			} else if (process_aen == RELOGIN_DDB_CHANGED_AENS) {
-				/* for use during init time, we only want to
-				 * relogin non-active ddbs */
-				struct ddb_entry *ddb_entry;
-
-				ddb_entry =
-					/* FIXME: name length? */
-					qla4xxx_lookup_ddb_by_fw_index(ha,
-								       mbox_sts[2]);
-				if (!ddb_entry)
-					break;
-
-				ddb_entry->dev_scan_wait_to_complete_relogin =
-					0;
-				ddb_entry->dev_scan_wait_to_start_relogin =
-					jiffies +
-					((ddb_entry->default_time2wait +
-					  4) * HZ);
-
-				DEBUG2(printk("scsi%ld: ddb [%d] initiate"
-					      " RELOGIN after %d seconds\n",
-					      ha->host_no,
-					      ddb_entry->fw_ddb_index,
-					      ddb_entry->default_time2wait +
-					      4));
-				break;
 			}
-
+		case PROCESS_ALL_AENS:
+		default:
 			if (mbox_sts[1] == 0) {	/* Global DB change. */
 				qla4xxx_reinitialize_ddb_list(ha);
 			} else if (mbox_sts[1] == 1) {	/* Specific device. */

diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c
index f65626a..f9d81c8 100644
--- a/drivers/scsi/qla4xxx/ql4_mbx.c
+++ b/drivers/scsi/qla4xxx/ql4_mbx.c

@@ -32,6 +32,7 @@
 	u_long wait_count;
 	uint32_t intr_status;
 	unsigned long flags = 0;
+	uint32_t dev_state;
 
 	/* Make sure that pointers are valid */
 	if (!mbx_cmd || !mbx_sts) {
@@ -40,12 +41,23 @@
 		return status;
 	}
 
-	if (is_qla8022(ha) &&
-	    test_bit(AF_FW_RECOVERY, &ha->flags)) {
-		DEBUG2(ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: prematurely "
-		    "completing mbx cmd as firmware recovery detected\n",
-		    ha->host_no, __func__));
-		return status;
+	if (is_qla8022(ha)) {
+		if (test_bit(AF_FW_RECOVERY, &ha->flags)) {
+			DEBUG2(ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: "
+			    "prematurely completing mbx cmd as firmware "
+			    "recovery detected\n", ha->host_no, __func__));
+			return status;
+		}
+		/* Do not send any mbx cmd if h/w is in failed state*/
+		qla4_8xxx_idc_lock(ha);
+		dev_state = qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE);
+		qla4_8xxx_idc_unlock(ha);
+		if (dev_state == QLA82XX_DEV_FAILED) {
+			ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: H/W is in "
+			    "failed state, do not send any mailbox commands\n",
+			    ha->host_no, __func__);
+			return status;
+		}
 	}
 
 	if ((is_aer_supported(ha)) &&
@@ -139,7 +151,7 @@
 	if (test_bit(AF_IRQ_ATTACHED, &ha->flags) &&
 	    test_bit(AF_INTERRUPTS_ON, &ha->flags) &&
 	    test_bit(AF_ONLINE, &ha->flags) &&
-	    !test_bit(AF_HBA_GOING_AWAY, &ha->flags)) {
+	    !test_bit(AF_HA_REMOVAL, &ha->flags)) {
 		/* Do not poll for completion. Use completion queue */
 		set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags);
 		wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ);
@@ -395,9 +407,6 @@
 	/*memcpy(ha->alias, init_fw_cb->Alias,
 	       min(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));*/
 
-	/* Save Command Line Paramater info */
-	ha->discovery_wait = ql4xdiscoverywait;
-
 	if (ha->acb_version == ACB_SUPPORTED) {
 		ha->ipv6_options = init_fw_cb->ipv6_opts;
 		ha->ipv6_addl_options = init_fw_cb->ipv6_addtl_opts;
@@ -467,6 +476,11 @@
 
 	init_fw_cb->fw_options &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE);
 
+	/* Set bit for "serialize task mgmt" all other bits need to be zero */
+	init_fw_cb->add_fw_options = 0;
+	init_fw_cb->add_fw_options |=
+	    __constant_cpu_to_le16(SERIALIZE_TASK_MGMT);
+
 	if (qla4xxx_set_ifcb(ha, &mbox_cmd[0], &mbox_sts[0], init_fw_cb_dma)
 		!= QLA_SUCCESS) {
 		DEBUG2(printk(KERN_WARNING

diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c
index 3d5ef2d..35381cb 100644
--- a/drivers/scsi/qla4xxx/ql4_nx.c
+++ b/drivers/scsi/qla4xxx/ql4_nx.c

@@ -2304,14 +2304,13 @@
 void
 qla4_8xxx_disable_intrs(struct scsi_qla_host *ha)
 {
-	if (test_bit(AF_INTERRUPTS_ON, &ha->flags))
+	if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags))
 		qla4_8xxx_mbx_intr_disable(ha);
 
 	spin_lock_irq(&ha->hardware_lock);
 	/* BIT 10 - set */
 	qla4_8xxx_wr_32(ha, ha->nx_legacy_intr.tgt_mask_reg, 0x0400);
 	spin_unlock_irq(&ha->hardware_lock);
-	clear_bit(AF_INTERRUPTS_ON, &ha->flags);
 }
 
 struct ql4_init_msix_entry {

diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 967836e..a4acb0d 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c

@@ -29,10 +29,6 @@
 /*
  * Module parameter information and variables
  */
-int ql4xdiscoverywait = 60;
-module_param(ql4xdiscoverywait, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(ql4xdiscoverywait, "Discovery wait time");
-
 int ql4xdontresethba = 0;
 module_param(ql4xdontresethba, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ql4xdontresethba,
@@ -55,6 +51,17 @@
 		" 2 = enable MSI interrupt mechanism.");
 
 #define QL4_DEF_QDEPTH 32
+static int ql4xmaxqdepth = QL4_DEF_QDEPTH;
+module_param(ql4xmaxqdepth, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(ql4xmaxqdepth,
+		"Maximum queue depth to report for target devices.\n"
+		" Default: 32.");
+
+static int ql4xsess_recovery_tmo = QL4_SESS_RECOVERY_TMO;
+module_param(ql4xsess_recovery_tmo, int, S_IRUGO);
+MODULE_PARM_DESC(ql4xsess_recovery_tmo,
+		"Target Session Recovery Timeout.\n"
+		" Default: 30 sec.");
 
 /*
  * SCSI host template entry points
@@ -165,7 +172,7 @@
 		DEBUG2(printk("scsi%ld: %s: ddb [%d] session recovery timeout "
 			      "of (%d) secs exhausted, marking device DEAD.\n",
 			      ha->host_no, __func__, ddb_entry->fw_ddb_index,
-			      QL4_SESS_RECOVERY_TMO));
+			      ddb_entry->sess->recovery_tmo));
 	}
 }
 
@@ -295,7 +302,7 @@
 {
 	int err;
 
-	ddb_entry->sess->recovery_tmo = QL4_SESS_RECOVERY_TMO;
+	ddb_entry->sess->recovery_tmo = ql4xsess_recovery_tmo;
 
 	err = iscsi_add_session(ddb_entry->sess, ddb_entry->fw_ddb_index);
 	if (err) {
@@ -753,12 +760,6 @@
 	if (!pci_channel_offline(ha->pdev))
 		pci_read_config_word(ha->pdev, PCI_VENDOR_ID, &w);
 
-	if (test_bit(AF_HBA_GOING_AWAY, &ha->flags)) {
-		DEBUG2(ql4_printk(KERN_INFO, ha, "%s exited. HBA GOING AWAY\n",
-		    __func__));
-		return;
-	}
-
 	if (is_qla8022(ha)) {
 		qla4_8xxx_watchdog(ha);
 	}
@@ -1067,7 +1068,6 @@
 
 	/* Disable the board */
 	ql4_printk(KERN_INFO, ha, "Disabling the board\n");
-	set_bit(AF_HBA_GOING_AWAY, &ha->flags);
 
 	qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16);
 	qla4xxx_mark_all_devices_missing(ha);
@@ -1218,6 +1218,27 @@
 	return status;
 }
 
+static void qla4xxx_relogin_all_devices(struct scsi_qla_host *ha)
+{
+	struct ddb_entry *ddb_entry, *dtemp;
+
+	list_for_each_entry_safe(ddb_entry, dtemp, &ha->ddb_list, list) {
+		if ((atomic_read(&ddb_entry->state) == DDB_STATE_MISSING) ||
+		    (atomic_read(&ddb_entry->state) == DDB_STATE_DEAD)) {
+			if (ddb_entry->fw_ddb_device_state ==
+			    DDB_DS_SESSION_ACTIVE) {
+				atomic_set(&ddb_entry->state, DDB_STATE_ONLINE);
+				ql4_printk(KERN_INFO, ha, "scsi%ld: %s: ddb[%d]"
+				    " marked ONLINE\n",	ha->host_no, __func__,
+				    ddb_entry->fw_ddb_index);
+
+				iscsi_unblock_session(ddb_entry->sess);
+			} else
+				qla4xxx_relogin_device(ha, ddb_entry);
+		}
+	}
+}
+
 void qla4xxx_wake_dpc(struct scsi_qla_host *ha)
 {
 	if (ha->dpc_thread &&
@@ -1259,11 +1280,6 @@
 		goto do_dpc_exit;
 	}
 
-	/* HBA is in the process of being permanently disabled.
-	 * Don't process anything */
-	if (test_bit(AF_HBA_GOING_AWAY, &ha->flags))
-		return;
-
 	if (is_qla8022(ha)) {
 		if (test_bit(DPC_HA_UNRECOVERABLE, &ha->dpc_flags)) {
 			qla4_8xxx_idc_lock(ha);
@@ -1331,13 +1347,7 @@
 	if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) {
 		if (!test_bit(AF_LINK_UP, &ha->flags)) {
 			/* ---- link down? --- */
-			list_for_each_entry_safe(ddb_entry, dtemp,
-						 &ha->ddb_list, list) {
-				if (atomic_read(&ddb_entry->state) ==
-						DDB_STATE_ONLINE)
-					qla4xxx_mark_device_missing(ha,
-							ddb_entry);
-			}
+			qla4xxx_mark_all_devices_missing(ha);
 		} else {
 			/* ---- link up? --- *
 			 * F/W will auto login to all devices ONLY ONCE after
@@ -1346,30 +1356,7 @@
 			 * manually relogin to devices when recovering from
 			 * connection failures, logouts, expired KATO, etc. */
 
-			list_for_each_entry_safe(ddb_entry, dtemp,
-							&ha->ddb_list, list) {
-				if ((atomic_read(&ddb_entry->state) ==
-						 DDB_STATE_MISSING) ||
-				    (atomic_read(&ddb_entry->state) ==
-						 DDB_STATE_DEAD)) {
-					if (ddb_entry->fw_ddb_device_state ==
-					    DDB_DS_SESSION_ACTIVE) {
-						atomic_set(&ddb_entry->state,
-							   DDB_STATE_ONLINE);
-						ql4_printk(KERN_INFO, ha,
-						    "scsi%ld: %s: ddb[%d]"
-						    " marked ONLINE\n",
-						    ha->host_no, __func__,
-						    ddb_entry->fw_ddb_index);
-
-						iscsi_unblock_session(
-						    ddb_entry->sess);
-					} else
-						qla4xxx_relogin_device(
-						    ha, ddb_entry);
-				}
-
-			}
+			qla4xxx_relogin_all_devices(ha);
 		}
 	}
 
@@ -1630,6 +1617,7 @@
 	uint8_t init_retry_count = 0;
 	char buf[34];
 	struct qla4_8xxx_legacy_intr_set *nx_legacy_intr;
+	uint32_t dev_state;
 
 	if (pci_enable_device(pdev))
 		return -1;
@@ -1713,6 +1701,18 @@
 	status = qla4xxx_initialize_adapter(ha, REBUILD_DDB_LIST);
 	while ((!test_bit(AF_ONLINE, &ha->flags)) &&
 	    init_retry_count++ < MAX_INIT_RETRIES) {
+
+		if (is_qla8022(ha)) {
+			qla4_8xxx_idc_lock(ha);
+			dev_state = qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE);
+			qla4_8xxx_idc_unlock(ha);
+			if (dev_state == QLA82XX_DEV_FAILED) {
+				ql4_printk(KERN_WARNING, ha, "%s: don't retry "
+				    "initialize adapter. H/W is in failed state\n",
+				    __func__);
+				break;
+			}
+		}
 		DEBUG2(printk("scsi: %s: retrying adapter initialization "
 			      "(%d)\n", __func__, init_retry_count));
 
@@ -1815,6 +1815,44 @@
 }
 
 /**
+ * qla4xxx_prevent_other_port_reinit - prevent other port from re-initialize
+ * @ha: pointer to adapter structure
+ *
+ * Mark the other ISP-4xxx port to indicate that the driver is being removed,
+ * so that the other port will not re-initialize while in the process of
+ * removing the ha due to driver unload or hba hotplug.
+ **/
+static void qla4xxx_prevent_other_port_reinit(struct scsi_qla_host *ha)
+{
+	struct scsi_qla_host *other_ha = NULL;
+	struct pci_dev *other_pdev = NULL;
+	int fn = ISP4XXX_PCI_FN_2;
+
+	/*iscsi function numbers for ISP4xxx is 1 and 3*/
+	if (PCI_FUNC(ha->pdev->devfn) & BIT_1)
+		fn = ISP4XXX_PCI_FN_1;
+
+	other_pdev =
+		pci_get_domain_bus_and_slot(pci_domain_nr(ha->pdev->bus),
+		ha->pdev->bus->number, PCI_DEVFN(PCI_SLOT(ha->pdev->devfn),
+		fn));
+
+	/* Get other_ha if other_pdev is valid and state is enable*/
+	if (other_pdev) {
+		if (atomic_read(&other_pdev->enable_cnt)) {
+			other_ha = pci_get_drvdata(other_pdev);
+			if (other_ha) {
+				set_bit(AF_HA_REMOVAL, &other_ha->flags);
+				DEBUG2(ql4_printk(KERN_INFO, ha, "%s: "
+				    "Prevent %s reinit\n", __func__,
+				    dev_name(&other_ha->pdev->dev)));
+			}
+		}
+		pci_dev_put(other_pdev);
+	}
+}
+
+/**
  * qla4xxx_remove_adapter - calback function to remove adapter.
  * @pci_dev: PCI device pointer
  **/
@@ -1824,7 +1862,8 @@
 
 	ha = pci_get_drvdata(pdev);
 
-	set_bit(AF_HBA_GOING_AWAY, &ha->flags);
+	if (!is_qla8022(ha))
+		qla4xxx_prevent_other_port_reinit(ha);
 
 	/* remove devs from iscsi_sessions to scsi_devices */
 	qla4xxx_free_ddb_list(ha);
@@ -1868,10 +1907,15 @@
 {
 	struct iscsi_cls_session *sess = starget_to_session(sdev->sdev_target);
 	struct ddb_entry *ddb = sess->dd_data;
+	int queue_depth = QL4_DEF_QDEPTH;
 
 	sdev->hostdata = ddb;
 	sdev->tagged_supported = 1;
-	scsi_activate_tcq(sdev, QL4_DEF_QDEPTH);
+
+	if (ql4xmaxqdepth != 0 && ql4xmaxqdepth <= 0xffffU)
+		queue_depth = ql4xmaxqdepth;
+
+	scsi_activate_tcq(sdev, queue_depth);
 	return 0;
 }
 

diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h
index 8475b30..6031557 100644
--- a/drivers/scsi/qla4xxx/ql4_version.h
+++ b/drivers/scsi/qla4xxx/ql4_version.h

@@ -5,4 +5,4 @@
  * See LICENSE.qla4xxx for copyright and licensing details.
  */
 
-#define QLA4XXX_DRIVER_VERSION	"5.02.00-k5"
+#define QLA4XXX_DRIVER_VERSION	"5.02.00-k6"

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index b421839..3fd16d7 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c

@@ -1917,7 +1917,7 @@
 #define iscsi_priv_session_rw_attr(field, format)			\
 	iscsi_priv_session_attr_show(field, format)			\
 	iscsi_priv_session_attr_store(field)				\
-static ISCSI_CLASS_ATTR(priv_sess, field, S_IRUGO | S_IWUGO,		\
+static ISCSI_CLASS_ATTR(priv_sess, field, S_IRUGO | S_IWUSR,		\
 			show_priv_session_##field,			\
 			store_priv_session_##field)
 iscsi_priv_session_rw_attr(recovery_tmo, "%d");

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 7ff61d7..b61ebec 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c

@@ -2027,14 +2027,10 @@
 	int old_rcd = sdkp->RCD;
 	int old_dpofua = sdkp->DPOFUA;
 
-	if (sdp->skip_ms_page_8) {
-		if (sdp->type == TYPE_RBC)
-			goto defaults;
-		else {
-			modepage = 0x3F;
-			dbd = 0;
-		}
-	} else if (sdp->type == TYPE_RBC) {
+	if (sdp->skip_ms_page_8)
+		goto defaults;
+
+	if (sdp->type == TYPE_RBC) {
 		modepage = 6;
 		dbd = 8;
 	} else {
@@ -2062,11 +2058,13 @@
 	 */
 	if (len < 3)
 		goto bad_sense;
-	else if (len > SD_BUF_SIZE) {
-		sd_printk(KERN_NOTICE, sdkp, "Truncating mode parameter "
-			  "data from %d to %d bytes\n", len, SD_BUF_SIZE);
-		len = SD_BUF_SIZE;
-	}
+	if (len > 20)
+		len = 20;
+
+	/* Take headers and block descriptors into account */
+	len += data.header_length + data.block_descriptor_length;
+	if (len > SD_BUF_SIZE)
+		goto bad_sense;
 
 	/* Get the data */
 	res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr);
@@ -2074,45 +2072,16 @@
 	if (scsi_status_is_good(res)) {
 		int offset = data.header_length + data.block_descriptor_length;
 
-		while (offset < len) {
-			u8 page_code = buffer[offset] & 0x3F;
-			u8 spf       = buffer[offset] & 0x40;
-
-			if (page_code == 8 || page_code == 6) {
-				/* We're interested only in the first 3 bytes.
-				 */
-				if (len - offset <= 2) {
-					sd_printk(KERN_ERR, sdkp, "Incomplete "
-						  "mode parameter data\n");
-					goto defaults;
-				} else {
-					modepage = page_code;
-					goto Page_found;
-				}
-			} else {
-				/* Go to the next page */
-				if (spf && len - offset > 3)
-					offset += 4 + (buffer[offset+2] << 8) +
-						buffer[offset+3];
-				else if (!spf && len - offset > 1)
-					offset += 2 + buffer[offset+1];
-				else {
-					sd_printk(KERN_ERR, sdkp, "Incomplete "
-						  "mode parameter data\n");
-					goto defaults;
-				}
-			}
+		if (offset >= SD_BUF_SIZE - 2) {
+			sd_printk(KERN_ERR, sdkp, "Malformed MODE SENSE response\n");
+			goto defaults;
 		}
 
-		if (modepage == 0x3F) {
-			sd_printk(KERN_ERR, sdkp, "No Caching mode page "
-				  "present\n");
-			goto defaults;
-		} else if ((buffer[offset] & 0x3f) != modepage) {
+		if ((buffer[offset] & 0x3f) != modepage) {
 			sd_printk(KERN_ERR, sdkp, "Got wrong page\n");
 			goto defaults;
 		}
-	Page_found:
+
 		if (modepage == 8) {
 			sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0);
 			sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0);

diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index 7f5a6a8..eb7a3e8 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c

@@ -35,9 +35,11 @@
 
 struct ses_device {
 	unsigned char *page1;
+	unsigned char *page1_types;
 	unsigned char *page2;
 	unsigned char *page10;
 	short page1_len;
+	short page1_num_types;
 	short page2_len;
 	short page10_len;
 };
@@ -110,12 +112,12 @@
 	int i, j, count = 0, descriptor = ecomp->number;
 	struct scsi_device *sdev = to_scsi_device(edev->edev.parent);
 	struct ses_device *ses_dev = edev->scratch;
-	unsigned char *type_ptr = ses_dev->page1 + 12 + ses_dev->page1[11];
+	unsigned char *type_ptr = ses_dev->page1_types;
 	unsigned char *desc_ptr = ses_dev->page2 + 8;
 
 	/* Clear everything */
 	memset(desc_ptr, 0, ses_dev->page2_len - 8);
-	for (i = 0; i < ses_dev->page1[10]; i++, type_ptr += 4) {
+	for (i = 0; i < ses_dev->page1_num_types; i++, type_ptr += 4) {
 		for (j = 0; j < type_ptr[1]; j++) {
 			desc_ptr += 4;
 			if (type_ptr[0] != ENCLOSURE_COMPONENT_DEVICE &&
@@ -140,12 +142,12 @@
 	int i, j, count = 0, descriptor = ecomp->number;
 	struct scsi_device *sdev = to_scsi_device(edev->edev.parent);
 	struct ses_device *ses_dev = edev->scratch;
-	unsigned char *type_ptr = ses_dev->page1 + 12 + ses_dev->page1[11];
+	unsigned char *type_ptr = ses_dev->page1_types;
 	unsigned char *desc_ptr = ses_dev->page2 + 8;
 
 	ses_recv_diag(sdev, 2, ses_dev->page2, ses_dev->page2_len);
 
-	for (i = 0; i < ses_dev->page1[10]; i++, type_ptr += 4) {
+	for (i = 0; i < ses_dev->page1_num_types; i++, type_ptr += 4) {
 		for (j = 0; j < type_ptr[1]; j++) {
 			desc_ptr += 4;
 			if (type_ptr[0] != ENCLOSURE_COMPONENT_DEVICE &&
@@ -358,7 +360,7 @@
 	unsigned char *buf = NULL, *type_ptr, *desc_ptr, *addl_desc_ptr = NULL;
 	int i, j, page7_len, len, components;
 	struct ses_device *ses_dev = edev->scratch;
-	int types = ses_dev->page1[10];
+	int types = ses_dev->page1_num_types;
 	unsigned char *hdr_buf = kzalloc(INIT_ALLOC_SIZE, GFP_KERNEL);
 
 	if (!hdr_buf)
@@ -390,10 +392,10 @@
 		len = (desc_ptr[2] << 8) + desc_ptr[3];
 		/* skip past overall descriptor */
 		desc_ptr += len + 4;
-		if (ses_dev->page10)
-			addl_desc_ptr = ses_dev->page10 + 8;
 	}
-	type_ptr = ses_dev->page1 + 12 + ses_dev->page1[11];
+	if (ses_dev->page10)
+		addl_desc_ptr = ses_dev->page10 + 8;
+	type_ptr = ses_dev->page1_types;
 	components = 0;
 	for (i = 0; i < types; i++, type_ptr += 4) {
 		for (j = 0; j < type_ptr[1]; j++) {
@@ -503,6 +505,7 @@
 	u32 result;
 	int i, types, len, components = 0;
 	int err = -ENOMEM;
+	int num_enclosures;
 	struct enclosure_device *edev;
 	struct ses_component *scomp = NULL;
 
@@ -530,16 +533,6 @@
 	if (result)
 		goto recv_failed;
 
-	if (hdr_buf[1] != 0) {
-		/* FIXME: need subenclosure support; I've just never
-		 * seen a device with subenclosures and it makes the
-		 * traversal routines more complex */
-		sdev_printk(KERN_ERR, sdev,
-			"FIXME driver has no support for subenclosures (%d)\n",
-			hdr_buf[1]);
-		goto err_free;
-	}
-
 	len = (hdr_buf[2] << 8) + hdr_buf[3] + 4;
 	buf = kzalloc(len, GFP_KERNEL);
 	if (!buf)
@@ -549,11 +542,24 @@
 	if (result)
 		goto recv_failed;
 
-	types = buf[10];
+	types = 0;
 
-	type_ptr = buf + 12 + buf[11];
+	/* we always have one main enclosure and the rest are referred
+	 * to as secondary subenclosures */
+	num_enclosures = buf[1] + 1;
 
-	for (i = 0; i < types; i++, type_ptr += 4) {
+	/* begin at the enclosure descriptor */
+	type_ptr = buf + 8;
+	/* skip all the enclosure descriptors */
+	for (i = 0; i < num_enclosures && type_ptr < buf + len; i++) {
+		types += type_ptr[2];
+		type_ptr += type_ptr[3] + 4;
+	}
+
+	ses_dev->page1_types = type_ptr;
+	ses_dev->page1_num_types = types;
+
+	for (i = 0; i < types && type_ptr < buf + len; i++, type_ptr += 4) {
 		if (type_ptr[0] == ENCLOSURE_COMPONENT_DEVICE ||
 		    type_ptr[0] == ENCLOSURE_COMPONENT_ARRAY_DEVICE)
 			components += type_ptr[1];

diff --git a/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c b/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c
index 842cd92..289729d 100644
--- a/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c
+++ b/drivers/staging/westbridge/astoria/block/cyasblkdev_block.c

@@ -1191,7 +1191,7 @@
 		bd->user_disk_1->first_minor = (devidx + 1) << CYASBLKDEV_SHIFT;
 		bd->user_disk_1->minors = 8;
 		bd->user_disk_1->fops = &cyasblkdev_bdops;
-		bd->user_disk_0->events = DISK_EVENT_MEDIA_CHANGE;
+		bd->user_disk_1->events = DISK_EVENT_MEDIA_CHANGE;
 		bd->user_disk_1->private_data = bd;
 		bd->user_disk_1->queue = bd->queue.queue;
 		bd->dbgprn_flags = DBGPRN_RD_RQ;

diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index 2fac3be..9ef2dbb 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig

@@ -29,4 +29,6 @@
 	Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered
 	passthrough access to Linux/SCSI device
 
+source "drivers/target/loopback/Kconfig"
+
 endif

diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 973bb19..1178bbf 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile

@@ -1,4 +1,3 @@
-EXTRA_CFLAGS += -I$(srctree)/drivers/target/ -I$(srctree)/drivers/scsi/
 
 target_core_mod-y		:= target_core_configfs.o \
 				   target_core_device.o \
@@ -13,7 +12,8 @@
 				   target_core_transport.o \
 				   target_core_cdb.o \
 				   target_core_ua.o \
-				   target_core_rd.o
+				   target_core_rd.o \
+				   target_core_stat.o
 
 obj-$(CONFIG_TARGET_CORE)	+= target_core_mod.o
 
@@ -21,3 +21,6 @@
 obj-$(CONFIG_TCM_IBLOCK)	+= target_core_iblock.o
 obj-$(CONFIG_TCM_FILEIO)	+= target_core_file.o
 obj-$(CONFIG_TCM_PSCSI)		+= target_core_pscsi.o
+
+# Fabric modules
+obj-$(CONFIG_LOOPBACK_TARGET)	+= loopback/

diff --git a/drivers/target/loopback/Kconfig b/drivers/target/loopback/Kconfig
new file mode 100644
index 0000000..57dcbc2
--- /dev/null
+++ b/drivers/target/loopback/Kconfig

@@ -0,0 +1,11 @@
+config LOOPBACK_TARGET
+	tristate "TCM Virtual SAS target and Linux/SCSI LDD fabric loopback module"
+	help
+	  Say Y here to enable the TCM Virtual SAS target and Linux/SCSI LLD
+	  fabric loopback module.
+
+config LOOPBACK_TARGET_CDB_DEBUG
+	bool "TCM loopback fabric module CDB debug code"
+	depends on LOOPBACK_TARGET
+	help
+	  Say Y here to enable the TCM loopback fabric module CDB debug code

diff --git a/drivers/target/loopback/Makefile b/drivers/target/loopback/Makefile
new file mode 100644
index 0000000..6abebdf
--- /dev/null
+++ b/drivers/target/loopback/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_LOOPBACK_TARGET)	+= tcm_loop.o

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
new file mode 100644
index 0000000..aed4e46
--- /dev/null
+++ b/drivers/target/loopback/tcm_loop.c

@@ -0,0 +1,1579 @@
+/*******************************************************************************
+ *
+ * This file contains the Linux/SCSI LLD virtual SCSI initiator driver
+ * for emulated SAS initiator ports
+ *
+ * © Copyright 2011 RisingTide Systems LLC.
+ *
+ * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ *
+ * Author: Nicholas A. Bellinger <nab@risingtidesystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ ****************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/configfs.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/libsas.h> /* For TASK_ATTR_* */
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_fabric_configfs.h>
+#include <target/target_core_fabric_lib.h>
+#include <target/target_core_configfs.h>
+#include <target/target_core_device.h>
+#include <target/target_core_tpg.h>
+#include <target/target_core_tmr.h>
+
+#include "tcm_loop.h"
+
+#define to_tcm_loop_hba(hba)	container_of(hba, struct tcm_loop_hba, dev)
+
+/* Local pointer to allocated TCM configfs fabric module */
+static struct target_fabric_configfs *tcm_loop_fabric_configfs;
+
+static struct kmem_cache *tcm_loop_cmd_cache;
+
+static int tcm_loop_hba_no_cnt;
+
+/*
+ * Allocate a tcm_loop cmd descriptor from target_core_mod code
+ *
+ * Can be called from interrupt context in tcm_loop_queuecommand() below
+ */
+static struct se_cmd *tcm_loop_allocate_core_cmd(
+	struct tcm_loop_hba *tl_hba,
+	struct se_portal_group *se_tpg,
+	struct scsi_cmnd *sc)
+{
+	struct se_cmd *se_cmd;
+	struct se_session *se_sess;
+	struct tcm_loop_nexus *tl_nexus = tl_hba->tl_nexus;
+	struct tcm_loop_cmd *tl_cmd;
+	int sam_task_attr;
+
+	if (!tl_nexus) {
+		scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus"
+				" does not exist\n");
+		set_host_byte(sc, DID_ERROR);
+		return NULL;
+	}
+	se_sess = tl_nexus->se_sess;
+
+	tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_ATOMIC);
+	if (!tl_cmd) {
+		printk(KERN_ERR "Unable to allocate struct tcm_loop_cmd\n");
+		set_host_byte(sc, DID_ERROR);
+		return NULL;
+	}
+	se_cmd = &tl_cmd->tl_se_cmd;
+	/*
+	 * Save the pointer to struct scsi_cmnd *sc
+	 */
+	tl_cmd->sc = sc;
+	/*
+	 * Locate the SAM Task Attr from struct scsi_cmnd *
+	 */
+	if (sc->device->tagged_supported) {
+		switch (sc->tag) {
+		case HEAD_OF_QUEUE_TAG:
+			sam_task_attr = TASK_ATTR_HOQ;
+			break;
+		case ORDERED_QUEUE_TAG:
+			sam_task_attr = TASK_ATTR_ORDERED;
+			break;
+		default:
+			sam_task_attr = TASK_ATTR_SIMPLE;
+			break;
+		}
+	} else
+		sam_task_attr = TASK_ATTR_SIMPLE;
+
+	/*
+	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
+	 */
+	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess,
+			scsi_bufflen(sc), sc->sc_data_direction, sam_task_attr,
+			&tl_cmd->tl_sense_buf[0]);
+
+	/*
+	 * Signal BIDI usage with T_TASK(cmd)->t_tasks_bidi
+	 */
+	if (scsi_bidi_cmnd(sc))
+		T_TASK(se_cmd)->t_tasks_bidi = 1;
+	/*
+	 * Locate the struct se_lun pointer and attach it to struct se_cmd
+	 */
+	if (transport_get_lun_for_cmd(se_cmd, NULL, tl_cmd->sc->device->lun) < 0) {
+		kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
+		set_host_byte(sc, DID_NO_CONNECT);
+		return NULL;
+	}
+
+	transport_device_setup_cmd(se_cmd);
+	return se_cmd;
+}
+
+/*
+ * Called by struct target_core_fabric_ops->new_cmd_map()
+ *
+ * Always called in process context.  A non zero return value
+ * here will signal to handle an exception based on the return code.
+ */
+static int tcm_loop_new_cmd_map(struct se_cmd *se_cmd)
+{
+	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
+				struct tcm_loop_cmd, tl_se_cmd);
+	struct scsi_cmnd *sc = tl_cmd->sc;
+	void *mem_ptr, *mem_bidi_ptr = NULL;
+	u32 sg_no_bidi = 0;
+	int ret;
+	/*
+	 * Allocate the necessary tasks to complete the received CDB+data
+	 */
+	ret = transport_generic_allocate_tasks(se_cmd, tl_cmd->sc->cmnd);
+	if (ret == -1) {
+		/* Out of Resources */
+		return PYX_TRANSPORT_LU_COMM_FAILURE;
+	} else if (ret == -2) {
+		/*
+		 * Handle case for SAM_STAT_RESERVATION_CONFLICT
+		 */
+		if (se_cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
+			return PYX_TRANSPORT_RESERVATION_CONFLICT;
+		/*
+		 * Otherwise, return SAM_STAT_CHECK_CONDITION and return
+		 * sense data.
+		 */
+		return PYX_TRANSPORT_USE_SENSE_REASON;
+	}
+	/*
+	 * Setup the struct scatterlist memory from the received
+	 * struct scsi_cmnd.
+	 */
+	if (scsi_sg_count(sc)) {
+		se_cmd->se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM;
+		mem_ptr = (void *)scsi_sglist(sc);
+		/*
+		 * For BIDI commands, pass in the extra READ buffer
+		 * to transport_generic_map_mem_to_cmd() below..
+		 */
+		if (T_TASK(se_cmd)->t_tasks_bidi) {
+			struct scsi_data_buffer *sdb = scsi_in(sc);
+
+			mem_bidi_ptr = (void *)sdb->table.sgl;
+			sg_no_bidi = sdb->table.nents;
+		}
+	} else {
+		/*
+		 * Used for DMA_NONE
+		 */
+		mem_ptr = NULL;
+	}
+	/*
+	 * Map the SG memory into struct se_mem->page linked list using the same
+	 * physical memory at sg->page_link.
+	 */
+	ret = transport_generic_map_mem_to_cmd(se_cmd, mem_ptr,
+			scsi_sg_count(sc), mem_bidi_ptr, sg_no_bidi);
+	if (ret < 0)
+		return PYX_TRANSPORT_LU_COMM_FAILURE;
+
+	return 0;
+}
+
+/*
+ * Called from struct target_core_fabric_ops->check_stop_free()
+ */
+static void tcm_loop_check_stop_free(struct se_cmd *se_cmd)
+{
+	/*
+	 * Do not release struct se_cmd's containing a valid TMR
+	 * pointer.  These will be released directly in tcm_loop_device_reset()
+	 * with transport_generic_free_cmd().
+	 */
+	if (se_cmd->se_tmr_req)
+		return;
+	/*
+	 * Release the struct se_cmd, which will make a callback to release
+	 * struct tcm_loop_cmd * in tcm_loop_deallocate_core_cmd()
+	 */
+	transport_generic_free_cmd(se_cmd, 0, 1, 0);
+}
+
+/*
+ * Called from struct target_core_fabric_ops->release_cmd_to_pool()
+ */
+static void tcm_loop_deallocate_core_cmd(struct se_cmd *se_cmd)
+{
+	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
+				struct tcm_loop_cmd, tl_se_cmd);
+
+	kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
+}
+
+static int tcm_loop_proc_info(struct Scsi_Host *host, char *buffer,
+				char **start, off_t offset,
+				int length, int inout)
+{
+	return sprintf(buffer, "tcm_loop_proc_info()\n");
+}
+
+static int tcm_loop_driver_probe(struct device *);
+static int tcm_loop_driver_remove(struct device *);
+
+static int pseudo_lld_bus_match(struct device *dev,
+				struct device_driver *dev_driver)
+{
+	return 1;
+}
+
+static struct bus_type tcm_loop_lld_bus = {
+	.name			= "tcm_loop_bus",
+	.match			= pseudo_lld_bus_match,
+	.probe			= tcm_loop_driver_probe,
+	.remove			= tcm_loop_driver_remove,
+};
+
+static struct device_driver tcm_loop_driverfs = {
+	.name			= "tcm_loop",
+	.bus			= &tcm_loop_lld_bus,
+};
+/*
+ * Used with root_device_register() in tcm_loop_alloc_core_bus() below
+ */
+struct device *tcm_loop_primary;
+
+/*
+ * Copied from drivers/scsi/libfc/fc_fcp.c:fc_change_queue_depth() and
+ * drivers/scsi/libiscsi.c:iscsi_change_queue_depth()
+ */
+static int tcm_loop_change_queue_depth(
+	struct scsi_device *sdev,
+	int depth,
+	int reason)
+{
+	switch (reason) {
+	case SCSI_QDEPTH_DEFAULT:
+		scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
+		break;
+	case SCSI_QDEPTH_QFULL:
+		scsi_track_queue_full(sdev, depth);
+		break;
+	case SCSI_QDEPTH_RAMP_UP:
+		scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return sdev->queue_depth;
+}
+
+/*
+ * Main entry point from struct scsi_host_template for incoming SCSI CDB+Data
+ * from Linux/SCSI subsystem for SCSI low level device drivers (LLDs)
+ */
+static int tcm_loop_queuecommand(
+	struct Scsi_Host *sh,
+	struct scsi_cmnd *sc)
+{
+	struct se_cmd *se_cmd;
+	struct se_portal_group *se_tpg;
+	struct tcm_loop_hba *tl_hba;
+	struct tcm_loop_tpg *tl_tpg;
+
+	TL_CDB_DEBUG("tcm_loop_queuecommand() %d:%d:%d:%d got CDB: 0x%02x"
+		" scsi_buf_len: %u\n", sc->device->host->host_no,
+		sc->device->id, sc->device->channel, sc->device->lun,
+		sc->cmnd[0], scsi_bufflen(sc));
+	/*
+	 * Locate the tcm_loop_hba_t pointer
+	 */
+	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
+	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
+	se_tpg = &tl_tpg->tl_se_tpg;
+	/*
+	 * Determine the SAM Task Attribute and allocate tl_cmd and
+	 * tl_cmd->tl_se_cmd from TCM infrastructure
+	 */
+	se_cmd = tcm_loop_allocate_core_cmd(tl_hba, se_tpg, sc);
+	if (!se_cmd) {
+		sc->scsi_done(sc);
+		return 0;
+	}
+	/*
+	 * Queue up the newly allocated to be processed in TCM thread context.
+	*/
+	transport_generic_handle_cdb_map(se_cmd);
+	return 0;
+}
+
+/*
+ * Called from SCSI EH process context to issue a LUN_RESET TMR
+ * to struct scsi_device
+ */
+static int tcm_loop_device_reset(struct scsi_cmnd *sc)
+{
+	struct se_cmd *se_cmd = NULL;
+	struct se_portal_group *se_tpg;
+	struct se_session *se_sess;
+	struct tcm_loop_cmd *tl_cmd = NULL;
+	struct tcm_loop_hba *tl_hba;
+	struct tcm_loop_nexus *tl_nexus;
+	struct tcm_loop_tmr *tl_tmr = NULL;
+	struct tcm_loop_tpg *tl_tpg;
+	int ret = FAILED;
+	/*
+	 * Locate the tcm_loop_hba_t pointer
+	 */
+	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
+	/*
+	 * Locate the tl_nexus and se_sess pointers
+	 */
+	tl_nexus = tl_hba->tl_nexus;
+	if (!tl_nexus) {
+		printk(KERN_ERR "Unable to perform device reset without"
+				" active I_T Nexus\n");
+		return FAILED;
+	}
+	se_sess = tl_nexus->se_sess;
+	/*
+	 * Locate the tl_tpg and se_tpg pointers from TargetID in sc->device->id
+	 */
+	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
+	se_tpg = &tl_tpg->tl_se_tpg;
+
+	tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_KERNEL);
+	if (!tl_cmd) {
+		printk(KERN_ERR "Unable to allocate memory for tl_cmd\n");
+		return FAILED;
+	}
+
+	tl_tmr = kzalloc(sizeof(struct tcm_loop_tmr), GFP_KERNEL);
+	if (!tl_tmr) {
+		printk(KERN_ERR "Unable to allocate memory for tl_tmr\n");
+		goto release;
+	}
+	init_waitqueue_head(&tl_tmr->tl_tmr_wait);
+
+	se_cmd = &tl_cmd->tl_se_cmd;
+	/*
+	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
+	 */
+	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess, 0,
+				DMA_NONE, TASK_ATTR_SIMPLE,
+				&tl_cmd->tl_sense_buf[0]);
+	/*
+	 * Allocate the LUN_RESET TMR
+	 */
+	se_cmd->se_tmr_req = core_tmr_alloc_req(se_cmd, (void *)tl_tmr,
+				TMR_LUN_RESET);
+	if (!se_cmd->se_tmr_req)
+		goto release;
+	/*
+	 * Locate the underlying TCM struct se_lun from sc->device->lun
+	 */
+	if (transport_get_lun_for_tmr(se_cmd, sc->device->lun) < 0)
+		goto release;
+	/*
+	 * Queue the TMR to TCM Core and sleep waiting for tcm_loop_queue_tm_rsp()
+	 * to wake us up.
+	 */
+	transport_generic_handle_tmr(se_cmd);
+	wait_event(tl_tmr->tl_tmr_wait, atomic_read(&tl_tmr->tmr_complete));
+	/*
+	 * The TMR LUN_RESET has completed, check the response status and
+	 * then release allocations.
+	 */
+	ret = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
+		SUCCESS : FAILED;
+release:
+	if (se_cmd)
+		transport_generic_free_cmd(se_cmd, 1, 1, 0);
+	else
+		kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
+	kfree(tl_tmr);
+	return ret;
+}
+
+static int tcm_loop_slave_alloc(struct scsi_device *sd)
+{
+	set_bit(QUEUE_FLAG_BIDI, &sd->request_queue->queue_flags);
+	return 0;
+}
+
+static int tcm_loop_slave_configure(struct scsi_device *sd)
+{
+	return 0;
+}
+
+static struct scsi_host_template tcm_loop_driver_template = {
+	.proc_info		= tcm_loop_proc_info,
+	.proc_name		= "tcm_loopback",
+	.name			= "TCM_Loopback",
+	.queuecommand		= tcm_loop_queuecommand,
+	.change_queue_depth	= tcm_loop_change_queue_depth,
+	.eh_device_reset_handler = tcm_loop_device_reset,
+	.can_queue		= TL_SCSI_CAN_QUEUE,
+	.this_id		= -1,
+	.sg_tablesize		= TL_SCSI_SG_TABLESIZE,
+	.cmd_per_lun		= TL_SCSI_CMD_PER_LUN,
+	.max_sectors		= TL_SCSI_MAX_SECTORS,
+	.use_clustering		= DISABLE_CLUSTERING,
+	.slave_alloc		= tcm_loop_slave_alloc,
+	.slave_configure	= tcm_loop_slave_configure,
+	.module			= THIS_MODULE,
+};
+
+static int tcm_loop_driver_probe(struct device *dev)
+{
+	struct tcm_loop_hba *tl_hba;
+	struct Scsi_Host *sh;
+	int error;
+
+	tl_hba = to_tcm_loop_hba(dev);
+
+	sh = scsi_host_alloc(&tcm_loop_driver_template,
+			sizeof(struct tcm_loop_hba));
+	if (!sh) {
+		printk(KERN_ERR "Unable to allocate struct scsi_host\n");
+		return -ENODEV;
+	}
+	tl_hba->sh = sh;
+
+	/*
+	 * Assign the struct tcm_loop_hba pointer to struct Scsi_Host->hostdata
+	 */
+	*((struct tcm_loop_hba **)sh->hostdata) = tl_hba;
+	/*
+	 * Setup single ID, Channel and LUN for now..
+	 */
+	sh->max_id = 2;
+	sh->max_lun = 0;
+	sh->max_channel = 0;
+	sh->max_cmd_len = TL_SCSI_MAX_CMD_LEN;
+
+	error = scsi_add_host(sh, &tl_hba->dev);
+	if (error) {
+		printk(KERN_ERR "%s: scsi_add_host failed\n", __func__);
+		scsi_host_put(sh);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int tcm_loop_driver_remove(struct device *dev)
+{
+	struct tcm_loop_hba *tl_hba;
+	struct Scsi_Host *sh;
+
+	tl_hba = to_tcm_loop_hba(dev);
+	sh = tl_hba->sh;
+
+	scsi_remove_host(sh);
+	scsi_host_put(sh);
+	return 0;
+}
+
+static void tcm_loop_release_adapter(struct device *dev)
+{
+	struct tcm_loop_hba *tl_hba = to_tcm_loop_hba(dev);
+
+	kfree(tl_hba);
+}
+
+/*
+ * Called from tcm_loop_make_scsi_hba() in tcm_loop_configfs.c
+ */
+static int tcm_loop_setup_hba_bus(struct tcm_loop_hba *tl_hba, int tcm_loop_host_id)
+{
+	int ret;
+
+	tl_hba->dev.bus = &tcm_loop_lld_bus;
+	tl_hba->dev.parent = tcm_loop_primary;
+	tl_hba->dev.release = &tcm_loop_release_adapter;
+	dev_set_name(&tl_hba->dev, "tcm_loop_adapter_%d", tcm_loop_host_id);
+
+	ret = device_register(&tl_hba->dev);
+	if (ret) {
+		printk(KERN_ERR "device_register() failed for"
+				" tl_hba->dev: %d\n", ret);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * Called from tcm_loop_fabric_init() in tcl_loop_fabric.c to load the emulated
+ * tcm_loop SCSI bus.
+ */
+static int tcm_loop_alloc_core_bus(void)
+{
+	int ret;
+
+	tcm_loop_primary = root_device_register("tcm_loop_0");
+	if (IS_ERR(tcm_loop_primary)) {
+		printk(KERN_ERR "Unable to allocate tcm_loop_primary\n");
+		return PTR_ERR(tcm_loop_primary);
+	}
+
+	ret = bus_register(&tcm_loop_lld_bus);
+	if (ret) {
+		printk(KERN_ERR "bus_register() failed for tcm_loop_lld_bus\n");
+		goto dev_unreg;
+	}
+
+	ret = driver_register(&tcm_loop_driverfs);
+	if (ret) {
+		printk(KERN_ERR "driver_register() failed for"
+				"tcm_loop_driverfs\n");
+		goto bus_unreg;
+	}
+
+	printk(KERN_INFO "Initialized TCM Loop Core Bus\n");
+	return ret;
+
+bus_unreg:
+	bus_unregister(&tcm_loop_lld_bus);
+dev_unreg:
+	root_device_unregister(tcm_loop_primary);
+	return ret;
+}
+
+static void tcm_loop_release_core_bus(void)
+{
+	driver_unregister(&tcm_loop_driverfs);
+	bus_unregister(&tcm_loop_lld_bus);
+	root_device_unregister(tcm_loop_primary);
+
+	printk(KERN_INFO "Releasing TCM Loop Core BUS\n");
+}
+
+static char *tcm_loop_get_fabric_name(void)
+{
+	return "loopback";
+}
+
+static u8 tcm_loop_get_fabric_proto_ident(struct se_portal_group *se_tpg)
+{
+	struct tcm_loop_tpg *tl_tpg =
+			(struct tcm_loop_tpg *)se_tpg->se_tpg_fabric_ptr;
+	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
+	/*
+	 * tl_proto_id is set at tcm_loop_configfs.c:tcm_loop_make_scsi_hba()
+	 * time based on the protocol dependent prefix of the passed configfs group.
+	 *
+	 * Based upon tl_proto_id, TCM_Loop emulates the requested fabric
+	 * ProtocolID using target_core_fabric_lib.c symbols.
+	 */
+	switch (tl_hba->tl_proto_id) {
+	case SCSI_PROTOCOL_SAS:
+		return sas_get_fabric_proto_ident(se_tpg);
+	case SCSI_PROTOCOL_FCP:
+		return fc_get_fabric_proto_ident(se_tpg);
+	case SCSI_PROTOCOL_ISCSI:
+		return iscsi_get_fabric_proto_ident(se_tpg);
+	default:
+		printk(KERN_ERR "Unknown tl_proto_id: 0x%02x, using"
+			" SAS emulation\n", tl_hba->tl_proto_id);
+		break;
+	}
+
+	return sas_get_fabric_proto_ident(se_tpg);
+}
+
+static char *tcm_loop_get_endpoint_wwn(struct se_portal_group *se_tpg)
+{
+	struct tcm_loop_tpg *tl_tpg =
+		(struct tcm_loop_tpg *)se_tpg->se_tpg_fabric_ptr;
+	/*
+	 * Return the passed NAA identifier for the SAS Target Port
+	 */
+	return &tl_tpg->tl_hba->tl_wwn_address[0];
+}
+
+static u16 tcm_loop_get_tag(struct se_portal_group *se_tpg)
+{
+	struct tcm_loop_tpg *tl_tpg =
+		(struct tcm_loop_tpg *)se_tpg->se_tpg_fabric_ptr;
+	/*
+	 * This Tag is used when forming SCSI Name identifier in EVPD=1 0x83
+	 * to represent the SCSI Target Port.
+	 */
+	return tl_tpg->tl_tpgt;
+}
+
+static u32 tcm_loop_get_default_depth(struct se_portal_group *se_tpg)
+{
+	return 1;
+}
+
+static u32 tcm_loop_get_pr_transport_id(
+	struct se_portal_group *se_tpg,
+	struct se_node_acl *se_nacl,
+	struct t10_pr_registration *pr_reg,
+	int *format_code,
+	unsigned char *buf)
+{
+	struct tcm_loop_tpg *tl_tpg =
+			(struct tcm_loop_tpg *)se_tpg->se_tpg_fabric_ptr;
+	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
+
+	switch (tl_hba->tl_proto_id) {
+	case SCSI_PROTOCOL_SAS:
+		return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
+					format_code, buf);
+	case SCSI_PROTOCOL_FCP:
+		return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
+					format_code, buf);
+	case SCSI_PROTOCOL_ISCSI:
+		return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
+					format_code, buf);
+	default:
+		printk(KERN_ERR "Unknown tl_proto_id: 0x%02x, using"
+			" SAS emulation\n", tl_hba->tl_proto_id);
+		break;
+	}
+
+	return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
+			format_code, buf);
+}
+
+static u32 tcm_loop_get_pr_transport_id_len(
+	struct se_portal_group *se_tpg,
+	struct se_node_acl *se_nacl,
+	struct t10_pr_registration *pr_reg,
+	int *format_code)
+{
+	struct tcm_loop_tpg *tl_tpg =
+			(struct tcm_loop_tpg *)se_tpg->se_tpg_fabric_ptr;
+	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
+
+	switch (tl_hba->tl_proto_id) {
+	case SCSI_PROTOCOL_SAS:
+		return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
+					format_code);
+	case SCSI_PROTOCOL_FCP:
+		return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
+					format_code);
+	case SCSI_PROTOCOL_ISCSI:
+		return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
+					format_code);
+	default:
+		printk(KERN_ERR "Unknown tl_proto_id: 0x%02x, using"
+			" SAS emulation\n", tl_hba->tl_proto_id);
+		break;
+	}
+
+	return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
+			format_code);
+}
+
+/*
+ * Used for handling SCSI fabric dependent TransportIDs in SPC-3 and above
+ * Persistent Reservation SPEC_I_PT=1 and PROUT REGISTER_AND_MOVE operations.
+ */
+static char *tcm_loop_parse_pr_out_transport_id(
+	struct se_portal_group *se_tpg,
+	const char *buf,
+	u32 *out_tid_len,
+	char **port_nexus_ptr)
+{
+	struct tcm_loop_tpg *tl_tpg =
+			(struct tcm_loop_tpg *)se_tpg->se_tpg_fabric_ptr;
+	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
+
+	switch (tl_hba->tl_proto_id) {
+	case SCSI_PROTOCOL_SAS:
+		return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
+					port_nexus_ptr);
+	case SCSI_PROTOCOL_FCP:
+		return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
+					port_nexus_ptr);
+	case SCSI_PROTOCOL_ISCSI:
+		return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
+					port_nexus_ptr);
+	default:
+		printk(KERN_ERR "Unknown tl_proto_id: 0x%02x, using"
+			" SAS emulation\n", tl_hba->tl_proto_id);
+		break;
+	}
+
+	return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
+			port_nexus_ptr);
+}
+
+/*
+ * Returning (1) here allows for target_core_mod struct se_node_acl to be generated
+ * based upon the incoming fabric dependent SCSI Initiator Port
+ */
+static int tcm_loop_check_demo_mode(struct se_portal_group *se_tpg)
+{
+	return 1;
+}
+
+static int tcm_loop_check_demo_mode_cache(struct se_portal_group *se_tpg)
+{
+	return 0;
+}
+
+/*
+ * Allow I_T Nexus full READ-WRITE access without explict Initiator Node ACLs for
+ * local virtual Linux/SCSI LLD passthrough into VM hypervisor guest
+ */
+static int tcm_loop_check_demo_mode_write_protect(struct se_portal_group *se_tpg)
+{
+	return 0;
+}
+
+/*
+ * Because TCM_Loop does not use explict ACLs and MappedLUNs, this will
+ * never be called for TCM_Loop by target_core_fabric_configfs.c code.
+ * It has been added here as a nop for target_fabric_tf_ops_check()
+ */
+static int tcm_loop_check_prod_mode_write_protect(struct se_portal_group *se_tpg)
+{
+	return 0;
+}
+
+static struct se_node_acl *tcm_loop_tpg_alloc_fabric_acl(
+	struct se_portal_group *se_tpg)
+{
+	struct tcm_loop_nacl *tl_nacl;
+
+	tl_nacl = kzalloc(sizeof(struct tcm_loop_nacl), GFP_KERNEL);
+	if (!tl_nacl) {
+		printk(KERN_ERR "Unable to allocate struct tcm_loop_nacl\n");
+		return NULL;
+	}
+
+	return &tl_nacl->se_node_acl;
+}
+
+static void tcm_loop_tpg_release_fabric_acl(
+	struct se_portal_group *se_tpg,
+	struct se_node_acl *se_nacl)
+{
+	struct tcm_loop_nacl *tl_nacl = container_of(se_nacl,
+				struct tcm_loop_nacl, se_node_acl);
+
+	kfree(tl_nacl);
+}
+
+static u32 tcm_loop_get_inst_index(struct se_portal_group *se_tpg)
+{
+	return 1;
+}
+
+static void tcm_loop_new_cmd_failure(struct se_cmd *se_cmd)
+{
+	/*
+	 * Since TCM_loop is already passing struct scatterlist data from
+	 * struct scsi_cmnd, no more Linux/SCSI failure dependent state need
+	 * to be handled here.
+	 */
+	return;
+}
+
+static int tcm_loop_is_state_remove(struct se_cmd *se_cmd)
+{
+	/*
+	 * Assume struct scsi_cmnd is not in remove state..
+	 */
+	return 0;
+}
+
+static int tcm_loop_sess_logged_in(struct se_session *se_sess)
+{
+	/*
+	 * Assume that TL Nexus is always active
+	 */
+	return 1;
+}
+
+static u32 tcm_loop_sess_get_index(struct se_session *se_sess)
+{
+	return 1;
+}
+
+static void tcm_loop_set_default_node_attributes(struct se_node_acl *se_acl)
+{
+	return;
+}
+
+static u32 tcm_loop_get_task_tag(struct se_cmd *se_cmd)
+{
+	return 1;
+}
+
+static int tcm_loop_get_cmd_state(struct se_cmd *se_cmd)
+{
+	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
+			struct tcm_loop_cmd, tl_se_cmd);
+
+	return tl_cmd->sc_cmd_state;
+}
+
+static int tcm_loop_shutdown_session(struct se_session *se_sess)
+{
+	return 0;
+}
+
+static void tcm_loop_close_session(struct se_session *se_sess)
+{
+	return;
+};
+
+static void tcm_loop_stop_session(
+	struct se_session *se_sess,
+	int sess_sleep,
+	int conn_sleep)
+{
+	return;
+}
+
+static void tcm_loop_fall_back_to_erl0(struct se_session *se_sess)
+{
+	return;
+}
+
+static int tcm_loop_write_pending(struct se_cmd *se_cmd)
+{
+	/*
+	 * Since Linux/SCSI has already sent down a struct scsi_cmnd
+	 * sc->sc_data_direction of DMA_TO_DEVICE with struct scatterlist array
+	 * memory, and memory has already been mapped to struct se_cmd->t_mem_list
+	 * format with transport_generic_map_mem_to_cmd().
+	 *
+	 * We now tell TCM to add this WRITE CDB directly into the TCM storage
+	 * object execution queue.
+	 */
+	transport_generic_process_write(se_cmd);
+	return 0;
+}
+
+static int tcm_loop_write_pending_status(struct se_cmd *se_cmd)
+{
+	return 0;
+}
+
+static int tcm_loop_queue_data_in(struct se_cmd *se_cmd)
+{
+	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
+				struct tcm_loop_cmd, tl_se_cmd);
+	struct scsi_cmnd *sc = tl_cmd->sc;
+
+	TL_CDB_DEBUG("tcm_loop_queue_data_in() called for scsi_cmnd: %p"
+		     " cdb: 0x%02x\n", sc, sc->cmnd[0]);
+
+	sc->result = SAM_STAT_GOOD;
+	set_host_byte(sc, DID_OK);
+	sc->scsi_done(sc);
+	return 0;
+}
+
+static int tcm_loop_queue_status(struct se_cmd *se_cmd)
+{
+	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
+				struct tcm_loop_cmd, tl_se_cmd);
+	struct scsi_cmnd *sc = tl_cmd->sc;
+
+	TL_CDB_DEBUG("tcm_loop_queue_status() called for scsi_cmnd: %p"
+			" cdb: 0x%02x\n", sc, sc->cmnd[0]);
+
+	if (se_cmd->sense_buffer &&
+	   ((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
+	    (se_cmd->se_cmd_flags & SCF_EMULATED_TASK_SENSE))) {
+
+		memcpy((void *)sc->sense_buffer, (void *)se_cmd->sense_buffer,
+				SCSI_SENSE_BUFFERSIZE);
+		sc->result = SAM_STAT_CHECK_CONDITION;
+		set_driver_byte(sc, DRIVER_SENSE);
+	} else
+		sc->result = se_cmd->scsi_status;
+
+	set_host_byte(sc, DID_OK);
+	sc->scsi_done(sc);
+	return 0;
+}
+
+static int tcm_loop_queue_tm_rsp(struct se_cmd *se_cmd)
+{
+	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
+	struct tcm_loop_tmr *tl_tmr = se_tmr->fabric_tmr_ptr;
+	/*
+	 * The SCSI EH thread will be sleeping on se_tmr->tl_tmr_wait, go ahead
+	 * and wake up the wait_queue_head_t in tcm_loop_device_reset()
+	 */
+	atomic_set(&tl_tmr->tmr_complete, 1);
+	wake_up(&tl_tmr->tl_tmr_wait);
+	return 0;
+}
+
+static u16 tcm_loop_set_fabric_sense_len(struct se_cmd *se_cmd, u32 sense_length)
+{
+	return 0;
+}
+
+static u16 tcm_loop_get_fabric_sense_len(void)
+{
+	return 0;
+}
+
+static u64 tcm_loop_pack_lun(unsigned int lun)
+{
+	u64 result;
+
+	/* LSB of lun into byte 1 big-endian */
+	result = ((lun & 0xff) << 8);
+	/* use flat space addressing method */
+	result |= 0x40 | ((lun >> 8) & 0x3f);
+
+	return cpu_to_le64(result);
+}
+
+static char *tcm_loop_dump_proto_id(struct tcm_loop_hba *tl_hba)
+{
+	switch (tl_hba->tl_proto_id) {
+	case SCSI_PROTOCOL_SAS:
+		return "SAS";
+	case SCSI_PROTOCOL_FCP:
+		return "FCP";
+	case SCSI_PROTOCOL_ISCSI:
+		return "iSCSI";
+	default:
+		break;
+	}
+
+	return "Unknown";
+}
+
+/* Start items for tcm_loop_port_cit */
+
+static int tcm_loop_port_link(
+	struct se_portal_group *se_tpg,
+	struct se_lun *lun)
+{
+	struct tcm_loop_tpg *tl_tpg = container_of(se_tpg,
+				struct tcm_loop_tpg, tl_se_tpg);
+	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
+
+	atomic_inc(&tl_tpg->tl_tpg_port_count);
+	smp_mb__after_atomic_inc();
+	/*
+	 * Add Linux/SCSI struct scsi_device by HCTL
+	 */
+	scsi_add_device(tl_hba->sh, 0, tl_tpg->tl_tpgt, lun->unpacked_lun);
+
+	printk(KERN_INFO "TCM_Loop_ConfigFS: Port Link Successful\n");
+	return 0;
+}
+
+static void tcm_loop_port_unlink(
+	struct se_portal_group *se_tpg,
+	struct se_lun *se_lun)
+{
+	struct scsi_device *sd;
+	struct tcm_loop_hba *tl_hba;
+	struct tcm_loop_tpg *tl_tpg;
+
+	tl_tpg = container_of(se_tpg, struct tcm_loop_tpg, tl_se_tpg);
+	tl_hba = tl_tpg->tl_hba;
+
+	sd = scsi_device_lookup(tl_hba->sh, 0, tl_tpg->tl_tpgt,
+				se_lun->unpacked_lun);
+	if (!sd) {
+		printk(KERN_ERR "Unable to locate struct scsi_device for %d:%d:"
+			"%d\n", 0, tl_tpg->tl_tpgt, se_lun->unpacked_lun);
+		return;
+	}
+	/*
+	 * Remove Linux/SCSI struct scsi_device by HCTL
+	 */
+	scsi_remove_device(sd);
+	scsi_device_put(sd);
+
+	atomic_dec(&tl_tpg->tl_tpg_port_count);
+	smp_mb__after_atomic_dec();
+
+	printk(KERN_INFO "TCM_Loop_ConfigFS: Port Unlink Successful\n");
+}
+
+/* End items for tcm_loop_port_cit */
+
+/* Start items for tcm_loop_nexus_cit */
+
+static int tcm_loop_make_nexus(
+	struct tcm_loop_tpg *tl_tpg,
+	const char *name)
+{
+	struct se_portal_group *se_tpg;
+	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
+	struct tcm_loop_nexus *tl_nexus;
+
+	if (tl_tpg->tl_hba->tl_nexus) {
+		printk(KERN_INFO "tl_tpg->tl_hba->tl_nexus already exists\n");
+		return -EEXIST;
+	}
+	se_tpg = &tl_tpg->tl_se_tpg;
+
+	tl_nexus = kzalloc(sizeof(struct tcm_loop_nexus), GFP_KERNEL);
+	if (!tl_nexus) {
+		printk(KERN_ERR "Unable to allocate struct tcm_loop_nexus\n");
+		return -ENOMEM;
+	}
+	/*
+	 * Initialize the struct se_session pointer
+	 */
+	tl_nexus->se_sess = transport_init_session();
+	if (!tl_nexus->se_sess)
+		goto out;
+	/*
+	 * Since we are running in 'demo mode' this call with generate a
+	 * struct se_node_acl for the tcm_loop struct se_portal_group with the SCSI
+	 * Initiator port name of the passed configfs group 'name'.
+	 */
+	tl_nexus->se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
+				se_tpg, (unsigned char *)name);
+	if (!tl_nexus->se_sess->se_node_acl) {
+		transport_free_session(tl_nexus->se_sess);
+		goto out;
+	}
+	/*
+	 * Now, register the SAS I_T Nexus as active with the call to
+	 * transport_register_session()
+	 */
+	__transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl,
+			tl_nexus->se_sess, (void *)tl_nexus);
+	tl_tpg->tl_hba->tl_nexus = tl_nexus;
+	printk(KERN_INFO "TCM_Loop_ConfigFS: Established I_T Nexus to emulated"
+		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba),
+		name);
+	return 0;
+
+out:
+	kfree(tl_nexus);
+	return -ENOMEM;
+}
+
+static int tcm_loop_drop_nexus(
+	struct tcm_loop_tpg *tpg)
+{
+	struct se_session *se_sess;
+	struct tcm_loop_nexus *tl_nexus;
+	struct tcm_loop_hba *tl_hba = tpg->tl_hba;
+
+	tl_nexus = tpg->tl_hba->tl_nexus;
+	if (!tl_nexus)
+		return -ENODEV;
+
+	se_sess = tl_nexus->se_sess;
+	if (!se_sess)
+		return -ENODEV;
+
+	if (atomic_read(&tpg->tl_tpg_port_count)) {
+		printk(KERN_ERR "Unable to remove TCM_Loop I_T Nexus with"
+			" active TPG port count: %d\n",
+			atomic_read(&tpg->tl_tpg_port_count));
+		return -EPERM;
+	}
+
+	printk(KERN_INFO "TCM_Loop_ConfigFS: Removing I_T Nexus to emulated"
+		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba),
+		tl_nexus->se_sess->se_node_acl->initiatorname);
+	/*
+	 * Release the SCSI I_T Nexus to the emulated SAS Target Port
+	 */
+	transport_deregister_session(tl_nexus->se_sess);
+	tpg->tl_hba->tl_nexus = NULL;
+	kfree(tl_nexus);
+	return 0;
+}
+
+/* End items for tcm_loop_nexus_cit */
+
+static ssize_t tcm_loop_tpg_show_nexus(
+	struct se_portal_group *se_tpg,
+	char *page)
+{
+	struct tcm_loop_tpg *tl_tpg = container_of(se_tpg,
+			struct tcm_loop_tpg, tl_se_tpg);
+	struct tcm_loop_nexus *tl_nexus;
+	ssize_t ret;
+
+	tl_nexus = tl_tpg->tl_hba->tl_nexus;
+	if (!tl_nexus)
+		return -ENODEV;
+
+	ret = snprintf(page, PAGE_SIZE, "%s\n",
+		tl_nexus->se_sess->se_node_acl->initiatorname);
+
+	return ret;
+}
+
+static ssize_t tcm_loop_tpg_store_nexus(
+	struct se_portal_group *se_tpg,
+	const char *page,
+	size_t count)
+{
+	struct tcm_loop_tpg *tl_tpg = container_of(se_tpg,
+			struct tcm_loop_tpg, tl_se_tpg);
+	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
+	unsigned char i_port[TL_WWN_ADDR_LEN], *ptr, *port_ptr;
+	int ret;
+	/*
+	 * Shutdown the active I_T nexus if 'NULL' is passed..
+	 */
+	if (!strncmp(page, "NULL", 4)) {
+		ret = tcm_loop_drop_nexus(tl_tpg);
+		return (!ret) ? count : ret;
+	}
+	/*
+	 * Otherwise make sure the passed virtual Initiator port WWN matches
+	 * the fabric protocol_id set in tcm_loop_make_scsi_hba(), and call
+	 * tcm_loop_make_nexus()
+	 */
+	if (strlen(page) > TL_WWN_ADDR_LEN) {
+		printk(KERN_ERR "Emulated NAA Sas Address: %s, exceeds"
+				" max: %d\n", page, TL_WWN_ADDR_LEN);
+		return -EINVAL;
+	}
+	snprintf(&i_port[0], TL_WWN_ADDR_LEN, "%s", page);
+
+	ptr = strstr(i_port, "naa.");
+	if (ptr) {
+		if (tl_hba->tl_proto_id != SCSI_PROTOCOL_SAS) {
+			printk(KERN_ERR "Passed SAS Initiator Port %s does not"
+				" match target port protoid: %s\n", i_port,
+				tcm_loop_dump_proto_id(tl_hba));
+			return -EINVAL;
+		}
+		port_ptr = &i_port[0];
+		goto check_newline;
+	}
+	ptr = strstr(i_port, "fc.");
+	if (ptr) {
+		if (tl_hba->tl_proto_id != SCSI_PROTOCOL_FCP) {
+			printk(KERN_ERR "Passed FCP Initiator Port %s does not"
+				" match target port protoid: %s\n", i_port,
+				tcm_loop_dump_proto_id(tl_hba));
+			return -EINVAL;
+		}
+		port_ptr = &i_port[3]; /* Skip over "fc." */
+		goto check_newline;
+	}
+	ptr = strstr(i_port, "iqn.");
+	if (ptr) {
+		if (tl_hba->tl_proto_id != SCSI_PROTOCOL_ISCSI) {
+			printk(KERN_ERR "Passed iSCSI Initiator Port %s does not"
+				" match target port protoid: %s\n", i_port,
+				tcm_loop_dump_proto_id(tl_hba));
+			return -EINVAL;
+		}
+		port_ptr = &i_port[0];
+		goto check_newline;
+	}
+	printk(KERN_ERR "Unable to locate prefix for emulated Initiator Port:"
+			" %s\n", i_port);
+	return -EINVAL;
+	/*
+	 * Clear any trailing newline for the NAA WWN
+	 */
+check_newline:
+	if (i_port[strlen(i_port)-1] == '\n')
+		i_port[strlen(i_port)-1] = '\0';
+
+	ret = tcm_loop_make_nexus(tl_tpg, port_ptr);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+TF_TPG_BASE_ATTR(tcm_loop, nexus, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *tcm_loop_tpg_attrs[] = {
+	&tcm_loop_tpg_nexus.attr,
+	NULL,
+};
+
+/* Start items for tcm_loop_naa_cit */
+
+struct se_portal_group *tcm_loop_make_naa_tpg(
+	struct se_wwn *wwn,
+	struct config_group *group,
+	const char *name)
+{
+	struct tcm_loop_hba *tl_hba = container_of(wwn,
+			struct tcm_loop_hba, tl_hba_wwn);
+	struct tcm_loop_tpg *tl_tpg;
+	char *tpgt_str, *end_ptr;
+	int ret;
+	unsigned short int tpgt;
+
+	tpgt_str = strstr(name, "tpgt_");
+	if (!tpgt_str) {
+		printk(KERN_ERR "Unable to locate \"tpgt_#\" directory"
+				" group\n");
+		return ERR_PTR(-EINVAL);
+	}
+	tpgt_str += 5; /* Skip ahead of "tpgt_" */
+	tpgt = (unsigned short int) simple_strtoul(tpgt_str, &end_ptr, 0);
+
+	if (tpgt > TL_TPGS_PER_HBA) {
+		printk(KERN_ERR "Passed tpgt: %hu exceeds TL_TPGS_PER_HBA:"
+				" %u\n", tpgt, TL_TPGS_PER_HBA);
+		return ERR_PTR(-EINVAL);
+	}
+	tl_tpg = &tl_hba->tl_hba_tpgs[tpgt];
+	tl_tpg->tl_hba = tl_hba;
+	tl_tpg->tl_tpgt = tpgt;
+	/*
+	 * Register the tl_tpg as a emulated SAS TCM Target Endpoint
+	 */
+	ret = core_tpg_register(&tcm_loop_fabric_configfs->tf_ops,
+			wwn, &tl_tpg->tl_se_tpg, (void *)tl_tpg,
+			TRANSPORT_TPG_TYPE_NORMAL);
+	if (ret < 0)
+		return ERR_PTR(-ENOMEM);
+
+	printk(KERN_INFO "TCM_Loop_ConfigFS: Allocated Emulated %s"
+		" Target Port %s,t,0x%04x\n", tcm_loop_dump_proto_id(tl_hba),
+		config_item_name(&wwn->wwn_group.cg_item), tpgt);
+
+	return &tl_tpg->tl_se_tpg;
+}
+
+void tcm_loop_drop_naa_tpg(
+	struct se_portal_group *se_tpg)
+{
+	struct se_wwn *wwn = se_tpg->se_tpg_wwn;
+	struct tcm_loop_tpg *tl_tpg = container_of(se_tpg,
+				struct tcm_loop_tpg, tl_se_tpg);
+	struct tcm_loop_hba *tl_hba;
+	unsigned short tpgt;
+
+	tl_hba = tl_tpg->tl_hba;
+	tpgt = tl_tpg->tl_tpgt;
+	/*
+	 * Release the I_T Nexus for the Virtual SAS link if present
+	 */
+	tcm_loop_drop_nexus(tl_tpg);
+	/*
+	 * Deregister the tl_tpg as a emulated SAS TCM Target Endpoint
+	 */
+	core_tpg_deregister(se_tpg);
+
+	printk(KERN_INFO "TCM_Loop_ConfigFS: Deallocated Emulated %s"
+		" Target Port %s,t,0x%04x\n", tcm_loop_dump_proto_id(tl_hba),
+		config_item_name(&wwn->wwn_group.cg_item), tpgt);
+}
+
+/* End items for tcm_loop_naa_cit */
+
+/* Start items for tcm_loop_cit */
+
+struct se_wwn *tcm_loop_make_scsi_hba(
+	struct target_fabric_configfs *tf,
+	struct config_group *group,
+	const char *name)
+{
+	struct tcm_loop_hba *tl_hba;
+	struct Scsi_Host *sh;
+	char *ptr;
+	int ret, off = 0;
+
+	tl_hba = kzalloc(sizeof(struct tcm_loop_hba), GFP_KERNEL);
+	if (!tl_hba) {
+		printk(KERN_ERR "Unable to allocate struct tcm_loop_hba\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	/*
+	 * Determine the emulated Protocol Identifier and Target Port Name
+	 * based on the incoming configfs directory name.
+	 */
+	ptr = strstr(name, "naa.");
+	if (ptr) {
+		tl_hba->tl_proto_id = SCSI_PROTOCOL_SAS;
+		goto check_len;
+	}
+	ptr = strstr(name, "fc.");
+	if (ptr) {
+		tl_hba->tl_proto_id = SCSI_PROTOCOL_FCP;
+		off = 3; /* Skip over "fc." */
+		goto check_len;
+	}
+	ptr = strstr(name, "iqn.");
+	if (ptr) {
+		tl_hba->tl_proto_id = SCSI_PROTOCOL_ISCSI;
+		goto check_len;
+	}
+
+	printk(KERN_ERR "Unable to locate prefix for emulated Target Port:"
+			" %s\n", name);
+	return ERR_PTR(-EINVAL);
+
+check_len:
+	if (strlen(name) > TL_WWN_ADDR_LEN) {
+		printk(KERN_ERR "Emulated NAA %s Address: %s, exceeds"
+			" max: %d\n", name, tcm_loop_dump_proto_id(tl_hba),
+			TL_WWN_ADDR_LEN);
+		kfree(tl_hba);
+		return ERR_PTR(-EINVAL);
+	}
+	snprintf(&tl_hba->tl_wwn_address[0], TL_WWN_ADDR_LEN, "%s", &name[off]);
+
+	/*
+	 * Call device_register(tl_hba->dev) to register the emulated
+	 * Linux/SCSI LLD of type struct Scsi_Host at tl_hba->sh after
+	 * device_register() callbacks in tcm_loop_driver_probe()
+	 */
+	ret = tcm_loop_setup_hba_bus(tl_hba, tcm_loop_hba_no_cnt);
+	if (ret)
+		goto out;
+
+	sh = tl_hba->sh;
+	tcm_loop_hba_no_cnt++;
+	printk(KERN_INFO "TCM_Loop_ConfigFS: Allocated emulated Target"
+		" %s Address: %s at Linux/SCSI Host ID: %d\n",
+		tcm_loop_dump_proto_id(tl_hba), name, sh->host_no);
+
+	return &tl_hba->tl_hba_wwn;
+out:
+	kfree(tl_hba);
+	return ERR_PTR(ret);
+}
+
+void tcm_loop_drop_scsi_hba(
+	struct se_wwn *wwn)
+{
+	struct tcm_loop_hba *tl_hba = container_of(wwn,
+				struct tcm_loop_hba, tl_hba_wwn);
+	int host_no = tl_hba->sh->host_no;
+	/*
+	 * Call device_unregister() on the original tl_hba->dev.
+	 * tcm_loop_fabric_scsi.c:tcm_loop_release_adapter() will
+	 * release *tl_hba;
+	 */
+	device_unregister(&tl_hba->dev);
+
+	printk(KERN_INFO "TCM_Loop_ConfigFS: Deallocated emulated Target"
+		" SAS Address: %s at Linux/SCSI Host ID: %d\n",
+		config_item_name(&wwn->wwn_group.cg_item), host_no);
+}
+
+/* Start items for tcm_loop_cit */
+static ssize_t tcm_loop_wwn_show_attr_version(
+	struct target_fabric_configfs *tf,
+	char *page)
+{
+	return sprintf(page, "TCM Loopback Fabric module %s\n", TCM_LOOP_VERSION);
+}
+
+TF_WWN_ATTR_RO(tcm_loop, version);
+
+static struct configfs_attribute *tcm_loop_wwn_attrs[] = {
+	&tcm_loop_wwn_version.attr,
+	NULL,
+};
+
+/* End items for tcm_loop_cit */
+
+static int tcm_loop_register_configfs(void)
+{
+	struct target_fabric_configfs *fabric;
+	struct config_group *tf_cg;
+	int ret;
+	/*
+	 * Set the TCM Loop HBA counter to zero
+	 */
+	tcm_loop_hba_no_cnt = 0;
+	/*
+	 * Register the top level struct config_item_type with TCM core
+	 */
+	fabric = target_fabric_configfs_init(THIS_MODULE, "loopback");
+	if (!fabric) {
+		printk(KERN_ERR "tcm_loop_register_configfs() failed!\n");
+		return -1;
+	}
+	/*
+	 * Setup the fabric API of function pointers used by target_core_mod
+	 */
+	fabric->tf_ops.get_fabric_name = &tcm_loop_get_fabric_name;
+	fabric->tf_ops.get_fabric_proto_ident = &tcm_loop_get_fabric_proto_ident;
+	fabric->tf_ops.tpg_get_wwn = &tcm_loop_get_endpoint_wwn;
+	fabric->tf_ops.tpg_get_tag = &tcm_loop_get_tag;
+	fabric->tf_ops.tpg_get_default_depth = &tcm_loop_get_default_depth;
+	fabric->tf_ops.tpg_get_pr_transport_id = &tcm_loop_get_pr_transport_id;
+	fabric->tf_ops.tpg_get_pr_transport_id_len =
+					&tcm_loop_get_pr_transport_id_len;
+	fabric->tf_ops.tpg_parse_pr_out_transport_id =
+					&tcm_loop_parse_pr_out_transport_id;
+	fabric->tf_ops.tpg_check_demo_mode = &tcm_loop_check_demo_mode;
+	fabric->tf_ops.tpg_check_demo_mode_cache =
+					&tcm_loop_check_demo_mode_cache;
+	fabric->tf_ops.tpg_check_demo_mode_write_protect =
+					&tcm_loop_check_demo_mode_write_protect;
+	fabric->tf_ops.tpg_check_prod_mode_write_protect =
+					&tcm_loop_check_prod_mode_write_protect;
+	/*
+	 * The TCM loopback fabric module runs in demo-mode to a local
+	 * virtual SCSI device, so fabric dependent initator ACLs are
+	 * not required.
+	 */
+	fabric->tf_ops.tpg_alloc_fabric_acl = &tcm_loop_tpg_alloc_fabric_acl;
+	fabric->tf_ops.tpg_release_fabric_acl =
+					&tcm_loop_tpg_release_fabric_acl;
+	fabric->tf_ops.tpg_get_inst_index = &tcm_loop_get_inst_index;
+	/*
+	 * Since tcm_loop is mapping physical memory from Linux/SCSI
+	 * struct scatterlist arrays for each struct scsi_cmnd I/O,
+	 * we do not need TCM to allocate a iovec array for
+	 * virtual memory address mappings
+	 */
+	fabric->tf_ops.alloc_cmd_iovecs = NULL;
+	/*
+	 * Used for setting up remaining TCM resources in process context
+	 */
+	fabric->tf_ops.new_cmd_map = &tcm_loop_new_cmd_map;
+	fabric->tf_ops.check_stop_free = &tcm_loop_check_stop_free;
+	fabric->tf_ops.release_cmd_to_pool = &tcm_loop_deallocate_core_cmd;
+	fabric->tf_ops.release_cmd_direct = &tcm_loop_deallocate_core_cmd;
+	fabric->tf_ops.shutdown_session = &tcm_loop_shutdown_session;
+	fabric->tf_ops.close_session = &tcm_loop_close_session;
+	fabric->tf_ops.stop_session = &tcm_loop_stop_session;
+	fabric->tf_ops.fall_back_to_erl0 = &tcm_loop_fall_back_to_erl0;
+	fabric->tf_ops.sess_logged_in = &tcm_loop_sess_logged_in;
+	fabric->tf_ops.sess_get_index = &tcm_loop_sess_get_index;
+	fabric->tf_ops.sess_get_initiator_sid = NULL;
+	fabric->tf_ops.write_pending = &tcm_loop_write_pending;
+	fabric->tf_ops.write_pending_status = &tcm_loop_write_pending_status;
+	/*
+	 * Not used for TCM loopback
+	 */
+	fabric->tf_ops.set_default_node_attributes =
+					&tcm_loop_set_default_node_attributes;
+	fabric->tf_ops.get_task_tag = &tcm_loop_get_task_tag;
+	fabric->tf_ops.get_cmd_state = &tcm_loop_get_cmd_state;
+	fabric->tf_ops.new_cmd_failure = &tcm_loop_new_cmd_failure;
+	fabric->tf_ops.queue_data_in = &tcm_loop_queue_data_in;
+	fabric->tf_ops.queue_status = &tcm_loop_queue_status;
+	fabric->tf_ops.queue_tm_rsp = &tcm_loop_queue_tm_rsp;
+	fabric->tf_ops.set_fabric_sense_len = &tcm_loop_set_fabric_sense_len;
+	fabric->tf_ops.get_fabric_sense_len = &tcm_loop_get_fabric_sense_len;
+	fabric->tf_ops.is_state_remove = &tcm_loop_is_state_remove;
+	fabric->tf_ops.pack_lun = &tcm_loop_pack_lun;
+
+	tf_cg = &fabric->tf_group;
+	/*
+	 * Setup function pointers for generic logic in target_core_fabric_configfs.c
+	 */
+	fabric->tf_ops.fabric_make_wwn = &tcm_loop_make_scsi_hba;
+	fabric->tf_ops.fabric_drop_wwn = &tcm_loop_drop_scsi_hba;
+	fabric->tf_ops.fabric_make_tpg = &tcm_loop_make_naa_tpg;
+	fabric->tf_ops.fabric_drop_tpg = &tcm_loop_drop_naa_tpg;
+	/*
+	 * fabric_post_link() and fabric_pre_unlink() are used for
+	 * registration and release of TCM Loop Virtual SCSI LUNs.
+	 */
+	fabric->tf_ops.fabric_post_link = &tcm_loop_port_link;
+	fabric->tf_ops.fabric_pre_unlink = &tcm_loop_port_unlink;
+	fabric->tf_ops.fabric_make_np = NULL;
+	fabric->tf_ops.fabric_drop_np = NULL;
+	/*
+	 * Setup default attribute lists for various fabric->tf_cit_tmpl
+	 */
+	TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = tcm_loop_wwn_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = tcm_loop_tpg_attrs;
+	TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL;
+	TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL;
+	/*
+	 * Once fabric->tf_ops has been setup, now register the fabric for
+	 * use within TCM
+	 */
+	ret = target_fabric_configfs_register(fabric);
+	if (ret < 0) {
+		printk(KERN_ERR "target_fabric_configfs_register() for"
+				" TCM_Loop failed!\n");
+		target_fabric_configfs_free(fabric);
+		return -1;
+	}
+	/*
+	 * Setup our local pointer to *fabric.
+	 */
+	tcm_loop_fabric_configfs = fabric;
+	printk(KERN_INFO "TCM_LOOP[0] - Set fabric ->"
+			" tcm_loop_fabric_configfs\n");
+	return 0;
+}
+
+static void tcm_loop_deregister_configfs(void)
+{
+	if (!tcm_loop_fabric_configfs)
+		return;
+
+	target_fabric_configfs_deregister(tcm_loop_fabric_configfs);
+	tcm_loop_fabric_configfs = NULL;
+	printk(KERN_INFO "TCM_LOOP[0] - Cleared"
+				" tcm_loop_fabric_configfs\n");
+}
+
+static int __init tcm_loop_fabric_init(void)
+{
+	int ret;
+
+	tcm_loop_cmd_cache = kmem_cache_create("tcm_loop_cmd_cache",
+				sizeof(struct tcm_loop_cmd),
+				__alignof__(struct tcm_loop_cmd),
+				0, NULL);
+	if (!tcm_loop_cmd_cache) {
+		printk(KERN_ERR "kmem_cache_create() for"
+			" tcm_loop_cmd_cache failed\n");
+		return -ENOMEM;
+	}
+
+	ret = tcm_loop_alloc_core_bus();
+	if (ret)
+		return ret;
+
+	ret = tcm_loop_register_configfs();
+	if (ret) {
+		tcm_loop_release_core_bus();
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit tcm_loop_fabric_exit(void)
+{
+	tcm_loop_deregister_configfs();
+	tcm_loop_release_core_bus();
+	kmem_cache_destroy(tcm_loop_cmd_cache);
+}
+
+MODULE_DESCRIPTION("TCM loopback virtual Linux/SCSI fabric module");
+MODULE_AUTHOR("Nicholas A. Bellinger <nab@risingtidesystems.com>");
+MODULE_LICENSE("GPL");
+module_init(tcm_loop_fabric_init);
+module_exit(tcm_loop_fabric_exit);

diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h
new file mode 100644
index 0000000..7e9f7ab
--- /dev/null
+++ b/drivers/target/loopback/tcm_loop.h

@@ -0,0 +1,77 @@
+#define TCM_LOOP_VERSION		"v2.1-rc1"
+#define TL_WWN_ADDR_LEN			256
+#define TL_TPGS_PER_HBA			32
+/*
+ * Defaults for struct scsi_host_template tcm_loop_driver_template
+ *
+ * We use large can_queue and cmd_per_lun here and let TCM enforce
+ * the underlying se_device_t->queue_depth.
+ */
+#define TL_SCSI_CAN_QUEUE		1024
+#define TL_SCSI_CMD_PER_LUN		1024
+#define TL_SCSI_MAX_SECTORS		1024
+#define TL_SCSI_SG_TABLESIZE		256
+/*
+ * Used in tcm_loop_driver_probe() for struct Scsi_Host->max_cmd_len
+ */
+#define TL_SCSI_MAX_CMD_LEN		32
+
+#ifdef CONFIG_LOOPBACK_TARGET_CDB_DEBUG
+# define TL_CDB_DEBUG(x...)		printk(KERN_INFO x)
+#else
+# define TL_CDB_DEBUG(x...)
+#endif
+
+struct tcm_loop_cmd {
+	/* State of Linux/SCSI CDB+Data descriptor */
+	u32 sc_cmd_state;
+	/* Pointer to the CDB+Data descriptor from Linux/SCSI subsystem */
+	struct scsi_cmnd *sc;
+	struct list_head *tl_cmd_list;
+	/* The TCM I/O descriptor that is accessed via container_of() */
+	struct se_cmd tl_se_cmd;
+	/* Sense buffer that will be mapped into outgoing status */
+	unsigned char tl_sense_buf[TRANSPORT_SENSE_BUFFER];
+};
+
+struct tcm_loop_tmr {
+	atomic_t tmr_complete;
+	wait_queue_head_t tl_tmr_wait;
+};
+
+struct tcm_loop_nexus {
+	int it_nexus_active;
+	/*
+	 * Pointer to Linux/SCSI HBA from linux/include/scsi_host.h
+	 */
+	struct scsi_host *sh;
+	/*
+	 * Pointer to TCM session for I_T Nexus
+	 */
+	struct se_session *se_sess;
+};
+
+struct tcm_loop_nacl {
+	struct se_node_acl se_node_acl;
+};
+
+struct tcm_loop_tpg {
+	unsigned short tl_tpgt;
+	atomic_t tl_tpg_port_count;
+	struct se_portal_group tl_se_tpg;
+	struct tcm_loop_hba *tl_hba;
+};
+
+struct tcm_loop_hba {
+	u8 tl_proto_id;
+	unsigned char tl_wwn_address[TL_WWN_ADDR_LEN];
+	struct se_hba_s *se_hba;
+	struct se_lun *tl_hba_lun;
+	struct se_port *tl_hba_lun_sep;
+	struct se_device_s *se_dev_hba_ptr;
+	struct tcm_loop_nexus *tl_nexus;
+	struct device dev;
+	struct Scsi_Host *sh;
+	struct tcm_loop_tpg tl_hba_tpgs[TL_TPGS_PER_HBA];
+	struct se_wwn tl_hba_wwn;
+};

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index caf8dc1..a5f44a6 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c

@@ -3,8 +3,8 @@
  *
  * This file contains ConfigFS logic for the Generic Target Engine project.
  *
- * Copyright (c) 2008-2010 Rising Tide Systems
- * Copyright (c) 2008-2010 Linux-iSCSI.org
+ * Copyright (c) 2008-2011 Rising Tide Systems
+ * Copyright (c) 2008-2011 Linux-iSCSI.org
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -50,6 +50,7 @@
 #include "target_core_hba.h"
 #include "target_core_pr.h"
 #include "target_core_rd.h"
+#include "target_core_stat.h"
 
 static struct list_head g_tf_list;
 static struct mutex g_tf_lock;
@@ -1451,8 +1452,8 @@
 	size_t count)
 {
 	struct se_device *dev;
-	unsigned char *i_fabric, *t_fabric, *i_port = NULL, *t_port = NULL;
-	unsigned char *isid = NULL;
+	unsigned char *i_fabric = NULL, *i_port = NULL, *isid = NULL;
+	unsigned char *t_fabric = NULL, *t_port = NULL;
 	char *orig, *ptr, *arg_p, *opts;
 	substring_t args[MAX_OPT_ARGS];
 	unsigned long long tmp_ll;
@@ -1488,9 +1489,17 @@
 		switch (token) {
 		case Opt_initiator_fabric:
 			i_fabric = match_strdup(&args[0]);
+			if (!i_fabric) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			break;
 		case Opt_initiator_node:
 			i_port = match_strdup(&args[0]);
+			if (!i_port) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			if (strlen(i_port) > PR_APTPL_MAX_IPORT_LEN) {
 				printk(KERN_ERR "APTPL metadata initiator_node="
 					" exceeds PR_APTPL_MAX_IPORT_LEN: %d\n",
@@ -1501,6 +1510,10 @@
 			break;
 		case Opt_initiator_sid:
 			isid = match_strdup(&args[0]);
+			if (!isid) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			if (strlen(isid) > PR_REG_ISID_LEN) {
 				printk(KERN_ERR "APTPL metadata initiator_isid"
 					"= exceeds PR_REG_ISID_LEN: %d\n",
@@ -1511,6 +1524,10 @@
 			break;
 		case Opt_sa_res_key:
 			arg_p = match_strdup(&args[0]);
+			if (!arg_p) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			ret = strict_strtoull(arg_p, 0, &tmp_ll);
 			if (ret < 0) {
 				printk(KERN_ERR "strict_strtoull() failed for"
@@ -1547,9 +1564,17 @@
 		 */
 		case Opt_target_fabric:
 			t_fabric = match_strdup(&args[0]);
+			if (!t_fabric) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			break;
 		case Opt_target_node:
 			t_port = match_strdup(&args[0]);
+			if (!t_port) {
+				ret = -ENOMEM;
+				goto out;
+			}
 			if (strlen(t_port) > PR_APTPL_MAX_TPORT_LEN) {
 				printk(KERN_ERR "APTPL metadata target_node="
 					" exceeds PR_APTPL_MAX_TPORT_LEN: %d\n",
@@ -1592,6 +1617,11 @@
 			i_port, isid, mapped_lun, t_port, tpgt, target_lun,
 			res_holder, all_tg_pt, type);
 out:
+	kfree(i_fabric);
+	kfree(i_port);
+	kfree(isid);
+	kfree(t_fabric);
+	kfree(t_port);
 	kfree(orig);
 	return (ret == 0) ? count : ret;
 }
@@ -1798,7 +1828,9 @@
 		return -EINVAL;
 
 	dev = t->create_virtdevice(hba, se_dev, se_dev->se_dev_su_ptr);
-	if (!(dev) || IS_ERR(dev))
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+	else if (!dev)
 		return -EINVAL;
 
 	se_dev->se_dev_ptr = dev;
@@ -2678,6 +2710,34 @@
 
 /* End functions for struct config_item_type target_core_alua_cit */
 
+/* Start functions for struct config_item_type target_core_stat_cit */
+
+static struct config_group *target_core_stat_mkdir(
+	struct config_group *group,
+	const char *name)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static void target_core_stat_rmdir(
+	struct config_group *group,
+	struct config_item *item)
+{
+	return;
+}
+
+static struct configfs_group_operations target_core_stat_group_ops = {
+	.make_group		= &target_core_stat_mkdir,
+	.drop_item		= &target_core_stat_rmdir,
+};
+
+static struct config_item_type target_core_stat_cit = {
+	.ct_group_ops		= &target_core_stat_group_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+/* End functions for struct config_item_type target_core_stat_cit */
+
 /* Start functions for struct config_item_type target_core_hba_cit */
 
 static struct config_group *target_core_make_subdev(
@@ -2690,10 +2750,12 @@
 	struct config_item *hba_ci = &group->cg_item;
 	struct se_hba *hba = item_to_hba(hba_ci);
 	struct config_group *dev_cg = NULL, *tg_pt_gp_cg = NULL;
+	struct config_group *dev_stat_grp = NULL;
+	int errno = -ENOMEM, ret;
 
-	if (mutex_lock_interruptible(&hba->hba_access_mutex))
-		return NULL;
-
+	ret = mutex_lock_interruptible(&hba->hba_access_mutex);
+	if (ret)
+		return ERR_PTR(ret);
 	/*
 	 * Locate the struct se_subsystem_api from parent's struct se_hba.
 	 */
@@ -2723,7 +2785,7 @@
 	se_dev->se_dev_hba = hba;
 	dev_cg = &se_dev->se_dev_group;
 
-	dev_cg->default_groups = kzalloc(sizeof(struct config_group) * 6,
+	dev_cg->default_groups = kzalloc(sizeof(struct config_group) * 7,
 			GFP_KERNEL);
 	if (!(dev_cg->default_groups))
 		goto out;
@@ -2755,13 +2817,17 @@
 			&target_core_dev_wwn_cit);
 	config_group_init_type_name(&se_dev->t10_alua.alua_tg_pt_gps_group,
 			"alua", &target_core_alua_tg_pt_gps_cit);
+	config_group_init_type_name(&se_dev->dev_stat_grps.stat_group,
+			"statistics", &target_core_stat_cit);
+
 	dev_cg->default_groups[0] = &se_dev->se_dev_attrib.da_group;
 	dev_cg->default_groups[1] = &se_dev->se_dev_pr_group;
 	dev_cg->default_groups[2] = &se_dev->t10_wwn.t10_wwn_group;
 	dev_cg->default_groups[3] = &se_dev->t10_alua.alua_tg_pt_gps_group;
-	dev_cg->default_groups[4] = NULL;
+	dev_cg->default_groups[4] = &se_dev->dev_stat_grps.stat_group;
+	dev_cg->default_groups[5] = NULL;
 	/*
-	 * Add core/$HBA/$DEV/alua/tg_pt_gps/default_tg_pt_gp
+	 * Add core/$HBA/$DEV/alua/default_tg_pt_gp
 	 */
 	tg_pt_gp = core_alua_allocate_tg_pt_gp(se_dev, "default_tg_pt_gp", 1);
 	if (!(tg_pt_gp))
@@ -2781,6 +2847,17 @@
 	tg_pt_gp_cg->default_groups[0] = &tg_pt_gp->tg_pt_gp_group;
 	tg_pt_gp_cg->default_groups[1] = NULL;
 	T10_ALUA(se_dev)->default_tg_pt_gp = tg_pt_gp;
+	/*
+	 * Add core/$HBA/$DEV/statistics/ default groups
+	 */
+	dev_stat_grp = &DEV_STAT_GRP(se_dev)->stat_group;
+	dev_stat_grp->default_groups = kzalloc(sizeof(struct config_group) * 4,
+				GFP_KERNEL);
+	if (!dev_stat_grp->default_groups) {
+		printk(KERN_ERR "Unable to allocate dev_stat_grp->default_groups\n");
+		goto out;
+	}
+	target_stat_setup_dev_default_groups(se_dev);
 
 	printk(KERN_INFO "Target_Core_ConfigFS: Allocated struct se_subsystem_dev:"
 		" %p se_dev_su_ptr: %p\n", se_dev, se_dev->se_dev_su_ptr);
@@ -2792,6 +2869,8 @@
 		core_alua_free_tg_pt_gp(T10_ALUA(se_dev)->default_tg_pt_gp);
 		T10_ALUA(se_dev)->default_tg_pt_gp = NULL;
 	}
+	if (dev_stat_grp)
+		kfree(dev_stat_grp->default_groups);
 	if (tg_pt_gp_cg)
 		kfree(tg_pt_gp_cg->default_groups);
 	if (dev_cg)
@@ -2801,7 +2880,7 @@
 	kfree(se_dev);
 unlock:
 	mutex_unlock(&hba->hba_access_mutex);
-	return NULL;
+	return ERR_PTR(errno);
 }
 
 static void target_core_drop_subdev(
@@ -2813,7 +2892,7 @@
 	struct se_hba *hba;
 	struct se_subsystem_api *t;
 	struct config_item *df_item;
-	struct config_group *dev_cg, *tg_pt_gp_cg;
+	struct config_group *dev_cg, *tg_pt_gp_cg, *dev_stat_grp;
 	int i;
 
 	hba = item_to_hba(&se_dev->se_dev_hba->hba_group.cg_item);
@@ -2825,6 +2904,14 @@
 	list_del(&se_dev->g_se_dev_list);
 	spin_unlock(&se_global->g_device_lock);
 
+	dev_stat_grp = &DEV_STAT_GRP(se_dev)->stat_group;
+	for (i = 0; dev_stat_grp->default_groups[i]; i++) {
+		df_item = &dev_stat_grp->default_groups[i]->cg_item;
+		dev_stat_grp->default_groups[i] = NULL;
+		config_item_put(df_item);
+	}
+	kfree(dev_stat_grp->default_groups);
+
 	tg_pt_gp_cg = &T10_ALUA(se_dev)->alua_tg_pt_gps_group;
 	for (i = 0; tg_pt_gp_cg->default_groups[i]; i++) {
 		df_item = &tg_pt_gp_cg->default_groups[i]->cg_item;
@@ -3044,7 +3131,7 @@
 
 /* Stop functions for struct config_item_type target_core_hba_cit */
 
-static int target_core_init_configfs(void)
+static int __init target_core_init_configfs(void)
 {
 	struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL;
 	struct config_group *lu_gp_cg = NULL;
@@ -3176,7 +3263,7 @@
 	return -1;
 }
 
-static void target_core_exit_configfs(void)
+static void __exit target_core_exit_configfs(void)
 {
 	struct configfs_subsystem *subsys;
 	struct config_group *hba_cg, *alua_cg, *lu_gp_cg;

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 350ed40..3fb8e32 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c

@@ -589,6 +589,7 @@
  *	Called with struct se_device->se_port_lock spinlock held.
  */
 static void core_release_port(struct se_device *dev, struct se_port *port)
+	__releases(&dev->se_port_lock) __acquires(&dev->se_port_lock)
 {
 	/*
 	 * Wait for any port reference for PR ALL_TG_PT=1 operation
@@ -779,49 +780,14 @@
 	return;
 }
 
-/*
- * Called with struct se_hba->device_lock held.
- */
-void se_clear_dev_ports(struct se_device *dev)
-{
-	struct se_hba *hba = dev->se_hba;
-	struct se_lun *lun;
-	struct se_portal_group *tpg;
-	struct se_port *sep, *sep_tmp;
-
-	spin_lock(&dev->se_port_lock);
-	list_for_each_entry_safe(sep, sep_tmp, &dev->dev_sep_list, sep_list) {
-		spin_unlock(&dev->se_port_lock);
-		spin_unlock(&hba->device_lock);
-
-		lun = sep->sep_lun;
-		tpg = sep->sep_tpg;
-		spin_lock(&lun->lun_sep_lock);
-		if (lun->lun_se_dev == NULL) {
-			spin_unlock(&lun->lun_sep_lock);
-			continue;
-		}
-		spin_unlock(&lun->lun_sep_lock);
-
-		core_dev_del_lun(tpg, lun->unpacked_lun);
-
-		spin_lock(&hba->device_lock);
-		spin_lock(&dev->se_port_lock);
-	}
-	spin_unlock(&dev->se_port_lock);
-
-	return;
-}
-
 /*	se_free_virtual_device():
  *
  *	Used for IBLOCK, RAMDISK, and FILEIO Transport Drivers.
  */
 int se_free_virtual_device(struct se_device *dev, struct se_hba *hba)
 {
-	spin_lock(&hba->device_lock);
-	se_clear_dev_ports(dev);
-	spin_unlock(&hba->device_lock);
+	if (!list_empty(&dev->dev_sep_list))
+		dump_stack();
 
 	core_alua_free_lu_gp_mem(dev);
 	se_release_device_for_hba(dev);

diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index b65d1c8..07ab5a3 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c

@@ -4,10 +4,10 @@
  * This file contains generic fabric module configfs infrastructure for
  * TCM v4.x code
  *
- * Copyright (c) 2010 Rising Tide Systems
- * Copyright (c) 2010 Linux-iSCSI.org
+ * Copyright (c) 2010,2011 Rising Tide Systems
+ * Copyright (c) 2010,2011 Linux-iSCSI.org
  *
- * Copyright (c) 2010 Nicholas A. Bellinger <nab@linux-iscsi.org>
+ * Copyright (c) Nicholas A. Bellinger <nab@linux-iscsi.org>
 *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -48,6 +48,7 @@
 #include "target_core_alua.h"
 #include "target_core_hba.h"
 #include "target_core_pr.h"
+#include "target_core_stat.h"
 
 #define TF_CIT_SETUP(_name, _item_ops, _group_ops, _attrs)		\
 static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) \
@@ -241,6 +242,32 @@
 
 /* End of tfc_tpg_mappedlun_cit */
 
+/* Start of tfc_tpg_mappedlun_port_cit */
+
+static struct config_group *target_core_mappedlun_stat_mkdir(
+	struct config_group *group,
+	const char *name)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static void target_core_mappedlun_stat_rmdir(
+	struct config_group *group,
+	struct config_item *item)
+{
+	return;
+}
+
+static struct configfs_group_operations target_fabric_mappedlun_stat_group_ops = {
+	.make_group		= target_core_mappedlun_stat_mkdir,
+	.drop_item		= target_core_mappedlun_stat_rmdir,
+};
+
+TF_CIT_SETUP(tpg_mappedlun_stat, NULL, &target_fabric_mappedlun_stat_group_ops,
+		NULL);
+
+/* End of tfc_tpg_mappedlun_port_cit */
+
 /* Start of tfc_tpg_nacl_attrib_cit */
 
 CONFIGFS_EATTR_OPS(target_fabric_nacl_attrib, se_node_acl, acl_attrib_group);
@@ -294,6 +321,7 @@
 	struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
 	struct se_lun_acl *lacl;
 	struct config_item *acl_ci;
+	struct config_group *lacl_cg = NULL, *ml_stat_grp = NULL;
 	char *buf;
 	unsigned long mapped_lun;
 	int ret = 0;
@@ -330,15 +358,42 @@
 
 	lacl = core_dev_init_initiator_node_lun_acl(se_tpg, mapped_lun,
 			config_item_name(acl_ci), &ret);
-	if (!(lacl))
+	if (!(lacl)) {
+		ret = -EINVAL;
 		goto out;
+	}
+
+	lacl_cg = &lacl->se_lun_group;
+	lacl_cg->default_groups = kzalloc(sizeof(struct config_group) * 2,
+				GFP_KERNEL);
+	if (!lacl_cg->default_groups) {
+		printk(KERN_ERR "Unable to allocate lacl_cg->default_groups\n");
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	config_group_init_type_name(&lacl->se_lun_group, name,
 			&TF_CIT_TMPL(tf)->tfc_tpg_mappedlun_cit);
+	config_group_init_type_name(&lacl->ml_stat_grps.stat_group,
+			"statistics", &TF_CIT_TMPL(tf)->tfc_tpg_mappedlun_stat_cit);
+	lacl_cg->default_groups[0] = &lacl->ml_stat_grps.stat_group;
+	lacl_cg->default_groups[1] = NULL;
+
+	ml_stat_grp = &ML_STAT_GRPS(lacl)->stat_group;
+	ml_stat_grp->default_groups = kzalloc(sizeof(struct config_group) * 3,
+				GFP_KERNEL);
+	if (!ml_stat_grp->default_groups) {
+		printk(KERN_ERR "Unable to allocate ml_stat_grp->default_groups\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+	target_stat_setup_mappedlun_default_groups(lacl);
 
 	kfree(buf);
 	return &lacl->se_lun_group;
 out:
+	if (lacl_cg)
+		kfree(lacl_cg->default_groups);
 	kfree(buf);
 	return ERR_PTR(ret);
 }
@@ -347,6 +402,28 @@
 	struct config_group *group,
 	struct config_item *item)
 {
+	struct se_lun_acl *lacl = container_of(to_config_group(item),
+			struct se_lun_acl, se_lun_group);
+	struct config_item *df_item;
+	struct config_group *lacl_cg = NULL, *ml_stat_grp = NULL;
+	int i;
+
+	ml_stat_grp = &ML_STAT_GRPS(lacl)->stat_group;
+	for (i = 0; ml_stat_grp->default_groups[i]; i++) {
+		df_item = &ml_stat_grp->default_groups[i]->cg_item;
+		ml_stat_grp->default_groups[i] = NULL;
+		config_item_put(df_item);
+	}
+	kfree(ml_stat_grp->default_groups);
+
+	lacl_cg = &lacl->se_lun_group;
+	for (i = 0; lacl_cg->default_groups[i]; i++) {
+		df_item = &lacl_cg->default_groups[i]->cg_item;
+		lacl_cg->default_groups[i] = NULL;
+		config_item_put(df_item);
+	}
+	kfree(lacl_cg->default_groups);
+
 	config_item_put(item);
 }
 
@@ -376,6 +453,15 @@
 
 /* End of tfc_tpg_nacl_base_cit */
 
+/* Start of tfc_node_fabric_stats_cit */
+/*
+ * This is used as a placeholder for struct se_node_acl->acl_fabric_stat_group
+ * to allow fabrics access to ->acl_fabric_stat_group->default_groups[]
+ */
+TF_CIT_SETUP(tpg_nacl_stat, NULL, NULL, NULL);
+
+/* End of tfc_wwn_fabric_stats_cit */
+
 /* Start of tfc_tpg_nacl_cit */
 
 static struct config_group *target_fabric_make_nodeacl(
@@ -402,7 +488,8 @@
 	nacl_cg->default_groups[0] = &se_nacl->acl_attrib_group;
 	nacl_cg->default_groups[1] = &se_nacl->acl_auth_group;
 	nacl_cg->default_groups[2] = &se_nacl->acl_param_group;
-	nacl_cg->default_groups[3] = NULL;
+	nacl_cg->default_groups[3] = &se_nacl->acl_fabric_stat_group;
+	nacl_cg->default_groups[4] = NULL;
 
 	config_group_init_type_name(&se_nacl->acl_group, name,
 			&TF_CIT_TMPL(tf)->tfc_tpg_nacl_base_cit);
@@ -412,6 +499,9 @@
 			&TF_CIT_TMPL(tf)->tfc_tpg_nacl_auth_cit);
 	config_group_init_type_name(&se_nacl->acl_param_group, "param",
 			&TF_CIT_TMPL(tf)->tfc_tpg_nacl_param_cit);
+	config_group_init_type_name(&se_nacl->acl_fabric_stat_group,
+			"fabric_statistics",
+			&TF_CIT_TMPL(tf)->tfc_tpg_nacl_stat_cit);
 
 	return &se_nacl->acl_group;
 }
@@ -758,6 +848,31 @@
 
 /* End of tfc_tpg_port_cit */
 
+/* Start of tfc_tpg_port_stat_cit */
+
+static struct config_group *target_core_port_stat_mkdir(
+	struct config_group *group,
+	const char *name)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static void target_core_port_stat_rmdir(
+	struct config_group *group,
+	struct config_item *item)
+{
+	return;
+}
+
+static struct configfs_group_operations target_fabric_port_stat_group_ops = {
+	.make_group		= target_core_port_stat_mkdir,
+	.drop_item		= target_core_port_stat_rmdir,
+};
+
+TF_CIT_SETUP(tpg_port_stat, NULL, &target_fabric_port_stat_group_ops, NULL);
+
+/* End of tfc_tpg_port_stat_cit */
+
 /* Start of tfc_tpg_lun_cit */
 
 static struct config_group *target_fabric_make_lun(
@@ -768,7 +883,9 @@
 	struct se_portal_group *se_tpg = container_of(group,
 			struct se_portal_group, tpg_lun_group);
 	struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
+	struct config_group *lun_cg = NULL, *port_stat_grp = NULL;
 	unsigned long unpacked_lun;
+	int errno;
 
 	if (strstr(name, "lun_") != name) {
 		printk(KERN_ERR "Unable to locate \'_\" in"
@@ -782,16 +899,64 @@
 	if (!(lun))
 		return ERR_PTR(-EINVAL);
 
+	lun_cg = &lun->lun_group;
+	lun_cg->default_groups = kzalloc(sizeof(struct config_group) * 2,
+				GFP_KERNEL);
+	if (!lun_cg->default_groups) {
+		printk(KERN_ERR "Unable to allocate lun_cg->default_groups\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
 	config_group_init_type_name(&lun->lun_group, name,
 			&TF_CIT_TMPL(tf)->tfc_tpg_port_cit);
+	config_group_init_type_name(&lun->port_stat_grps.stat_group,
+			"statistics", &TF_CIT_TMPL(tf)->tfc_tpg_port_stat_cit);
+	lun_cg->default_groups[0] = &lun->port_stat_grps.stat_group;
+	lun_cg->default_groups[1] = NULL;
+
+	port_stat_grp = &PORT_STAT_GRP(lun)->stat_group;
+	port_stat_grp->default_groups =  kzalloc(sizeof(struct config_group) * 3,
+				GFP_KERNEL);
+	if (!port_stat_grp->default_groups) {
+		printk(KERN_ERR "Unable to allocate port_stat_grp->default_groups\n");
+		errno = -ENOMEM;
+		goto out;
+	}
+	target_stat_setup_port_default_groups(lun);
 
 	return &lun->lun_group;
+out:
+	if (lun_cg)
+		kfree(lun_cg->default_groups);
+	return ERR_PTR(errno);
 }
 
 static void target_fabric_drop_lun(
 	struct config_group *group,
 	struct config_item *item)
 {
+	struct se_lun *lun = container_of(to_config_group(item),
+				struct se_lun, lun_group);
+	struct config_item *df_item;
+	struct config_group *lun_cg, *port_stat_grp;
+	int i;
+
+	port_stat_grp = &PORT_STAT_GRP(lun)->stat_group;
+	for (i = 0; port_stat_grp->default_groups[i]; i++) {
+		df_item = &port_stat_grp->default_groups[i]->cg_item;
+		port_stat_grp->default_groups[i] = NULL;
+		config_item_put(df_item);
+	}
+	kfree(port_stat_grp->default_groups);
+
+	lun_cg = &lun->lun_group;
+	for (i = 0; lun_cg->default_groups[i]; i++) {
+		df_item = &lun_cg->default_groups[i]->cg_item;
+		lun_cg->default_groups[i] = NULL;
+		config_item_put(df_item);
+	}
+	kfree(lun_cg->default_groups);
+
 	config_item_put(item);
 }
 
@@ -946,6 +1111,15 @@
 
 /* End of tfc_tpg_cit */
 
+/* Start of tfc_wwn_fabric_stats_cit */
+/*
+ * This is used as a placeholder for struct se_wwn->fabric_stat_group
+ * to allow fabrics access to ->fabric_stat_group->default_groups[]
+ */
+TF_CIT_SETUP(wwn_fabric_stats, NULL, NULL, NULL);
+
+/* End of tfc_wwn_fabric_stats_cit */
+
 /* Start of tfc_wwn_cit */
 
 static struct config_group *target_fabric_make_wwn(
@@ -966,8 +1140,17 @@
 		return ERR_PTR(-EINVAL);
 
 	wwn->wwn_tf = tf;
+	/*
+	 * Setup default groups from pre-allocated wwn->wwn_default_groups
+	 */
+	wwn->wwn_group.default_groups = wwn->wwn_default_groups;
+	wwn->wwn_group.default_groups[0] = &wwn->fabric_stat_group;
+	wwn->wwn_group.default_groups[1] = NULL;
+
 	config_group_init_type_name(&wwn->wwn_group, name,
 			&TF_CIT_TMPL(tf)->tfc_tpg_cit);
+	config_group_init_type_name(&wwn->fabric_stat_group, "fabric_statistics",
+			&TF_CIT_TMPL(tf)->tfc_wwn_fabric_stats_cit);
 
 	return &wwn->wwn_group;
 }
@@ -976,6 +1159,18 @@
 	struct config_group *group,
 	struct config_item *item)
 {
+	struct se_wwn *wwn = container_of(to_config_group(item),
+				struct se_wwn, wwn_group);
+	struct config_item *df_item;
+	struct config_group *cg = &wwn->wwn_group;
+	int i;
+
+	for (i = 0; cg->default_groups[i]; i++) {
+		df_item = &cg->default_groups[i]->cg_item;
+		cg->default_groups[i] = NULL;
+		config_item_put(df_item);
+	}
+
 	config_item_put(item);
 }
 
@@ -1015,9 +1210,11 @@
 {
 	target_fabric_setup_discovery_cit(tf);
 	target_fabric_setup_wwn_cit(tf);
+	target_fabric_setup_wwn_fabric_stats_cit(tf);
 	target_fabric_setup_tpg_cit(tf);
 	target_fabric_setup_tpg_base_cit(tf);
 	target_fabric_setup_tpg_port_cit(tf);
+	target_fabric_setup_tpg_port_stat_cit(tf);
 	target_fabric_setup_tpg_lun_cit(tf);
 	target_fabric_setup_tpg_np_cit(tf);
 	target_fabric_setup_tpg_np_base_cit(tf);
@@ -1028,7 +1225,9 @@
 	target_fabric_setup_tpg_nacl_attrib_cit(tf);
 	target_fabric_setup_tpg_nacl_auth_cit(tf);
 	target_fabric_setup_tpg_nacl_param_cit(tf);
+	target_fabric_setup_tpg_nacl_stat_cit(tf);
 	target_fabric_setup_tpg_mappedlun_cit(tf);
+	target_fabric_setup_tpg_mappedlun_stat_cit(tf);
 
 	return 0;
 }

diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c
index a3c695a..d57ad67 100644
--- a/drivers/target/target_core_fabric_lib.c
+++ b/drivers/target/target_core_fabric_lib.c

@@ -34,6 +34,7 @@
 #include <target/target_core_base.h>
 #include <target/target_core_device.h>
 #include <target/target_core_transport.h>
+#include <target/target_core_fabric_lib.h>
 #include <target/target_core_fabric_ops.h>
 #include <target/target_core_configfs.h>
 

diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 190ca8a..02f553a 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c

@@ -134,7 +134,7 @@
 	mm_segment_t old_fs;
 	struct file *file;
 	struct inode *inode = NULL;
-	int dev_flags = 0, flags;
+	int dev_flags = 0, flags, ret = -EINVAL;
 
 	memset(&dev_limits, 0, sizeof(struct se_dev_limits));
 
@@ -146,6 +146,7 @@
 	if (IS_ERR(dev_p)) {
 		printk(KERN_ERR "getname(%s) failed: %lu\n",
 			fd_dev->fd_dev_name, IS_ERR(dev_p));
+		ret = PTR_ERR(dev_p);
 		goto fail;
 	}
 #if 0
@@ -165,8 +166,12 @@
 		flags |= O_SYNC;
 
 	file = filp_open(dev_p, flags, 0600);
-
-	if (IS_ERR(file) || !file || !file->f_dentry) {
+	if (IS_ERR(file)) {
+		printk(KERN_ERR "filp_open(%s) failed\n", dev_p);
+		ret = PTR_ERR(file);
+		goto fail;
+	}
+	if (!file || !file->f_dentry) {
 		printk(KERN_ERR "filp_open(%s) failed\n", dev_p);
 		goto fail;
 	}
@@ -241,7 +246,7 @@
 		fd_dev->fd_file = NULL;
 	}
 	putname(dev_p);
-	return NULL;
+	return ERR_PTR(ret);
 }
 
 /*	fd_free_device(): (Part of se_subsystem_api_t template)
@@ -509,7 +514,7 @@
 static match_table_t tokens = {
 	{Opt_fd_dev_name, "fd_dev_name=%s"},
 	{Opt_fd_dev_size, "fd_dev_size=%s"},
-	{Opt_fd_buffered_io, "fd_buffered_id=%d"},
+	{Opt_fd_buffered_io, "fd_buffered_io=%d"},
 	{Opt_err, NULL}
 };
 
@@ -536,15 +541,26 @@
 		token = match_token(ptr, tokens, args);
 		switch (token) {
 		case Opt_fd_dev_name:
+			arg_p = match_strdup(&args[0]);
+			if (!arg_p) {
+				ret = -ENOMEM;
+				break;
+			}
 			snprintf(fd_dev->fd_dev_name, FD_MAX_DEV_NAME,
-					"%s", match_strdup(&args[0]));
+					"%s", arg_p);
+			kfree(arg_p);
 			printk(KERN_INFO "FILEIO: Referencing Path: %s\n",
 					fd_dev->fd_dev_name);
 			fd_dev->fbd_flags |= FBDF_HAS_PATH;
 			break;
 		case Opt_fd_dev_size:
 			arg_p = match_strdup(&args[0]);
+			if (!arg_p) {
+				ret = -ENOMEM;
+				break;
+			}
 			ret = strict_strtoull(arg_p, 0, &fd_dev->fd_dev_size);
+			kfree(arg_p);
 			if (ret < 0) {
 				printk(KERN_ERR "strict_strtoull() failed for"
 						" fd_dev_size=\n");

diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c
index 6ec51cb..0b8f8da 100644
--- a/drivers/target/target_core_hba.c
+++ b/drivers/target/target_core_hba.c

@@ -151,19 +151,8 @@
 int
 core_delete_hba(struct se_hba *hba)
 {
-	struct se_device *dev, *dev_tmp;
-
-	spin_lock(&hba->device_lock);
-	list_for_each_entry_safe(dev, dev_tmp, &hba->hba_dev_list, dev_list) {
-
-		se_clear_dev_ports(dev);
-		spin_unlock(&hba->device_lock);
-
-		se_release_device_for_hba(dev);
-
-		spin_lock(&hba->device_lock);
-	}
-	spin_unlock(&hba->device_lock);
+	if (!list_empty(&hba->hba_dev_list))
+		dump_stack();
 
 	hba->transport->detach_hba(hba);
 

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index eb0afec..8663900 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c

@@ -129,10 +129,11 @@
 	struct request_queue *q;
 	struct queue_limits *limits;
 	u32 dev_flags = 0;
+	int ret = -EINVAL;
 
 	if (!(ib_dev)) {
 		printk(KERN_ERR "Unable to locate struct iblock_dev parameter\n");
-		return 0;
+		return ERR_PTR(ret);
 	}
 	memset(&dev_limits, 0, sizeof(struct se_dev_limits));
 	/*
@@ -141,7 +142,7 @@
 	ib_dev->ibd_bio_set = bioset_create(32, 64);
 	if (!(ib_dev->ibd_bio_set)) {
 		printk(KERN_ERR "IBLOCK: Unable to create bioset()\n");
-		return 0;
+		return ERR_PTR(-ENOMEM);
 	}
 	printk(KERN_INFO "IBLOCK: Created bio_set()\n");
 	/*
@@ -153,8 +154,10 @@
 
 	bd = blkdev_get_by_path(ib_dev->ibd_udev_path,
 				FMODE_WRITE|FMODE_READ|FMODE_EXCL, ib_dev);
-	if (IS_ERR(bd))
+	if (IS_ERR(bd)) {
+		ret = PTR_ERR(bd);
 		goto failed;
+	}
 	/*
 	 * Setup the local scope queue_limits from struct request_queue->limits
 	 * to pass into transport_add_device_to_core_hba() as struct se_dev_limits.
@@ -184,9 +187,7 @@
 	 * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
 	 * in ATA and we need to set TPE=1
 	 */
-	if (blk_queue_discard(bdev_get_queue(bd))) {
-		struct request_queue *q = bdev_get_queue(bd);
-
+	if (blk_queue_discard(q)) {
 		DEV_ATTRIB(dev)->max_unmap_lba_count =
 				q->limits.max_discard_sectors;
 		/*
@@ -212,7 +213,7 @@
 	ib_dev->ibd_bd = NULL;
 	ib_dev->ibd_major = 0;
 	ib_dev->ibd_minor = 0;
-	return NULL;
+	return ERR_PTR(ret);
 }
 
 static void iblock_free_device(void *p)
@@ -467,7 +468,7 @@
 					       const char *page, ssize_t count)
 {
 	struct iblock_dev *ib_dev = se_dev->se_dev_su_ptr;
-	char *orig, *ptr, *opts;
+	char *orig, *ptr, *arg_p, *opts;
 	substring_t args[MAX_OPT_ARGS];
 	int ret = 0, arg, token;
 
@@ -490,9 +491,14 @@
 				ret = -EEXIST;
 				goto out;
 			}
-
-			ret = snprintf(ib_dev->ibd_udev_path, SE_UDEV_PATH_LEN,
-				"%s", match_strdup(&args[0]));
+			arg_p = match_strdup(&args[0]);
+			if (!arg_p) {
+				ret = -ENOMEM;
+				break;
+			}
+			snprintf(ib_dev->ibd_udev_path, SE_UDEV_PATH_LEN,
+					"%s", arg_p);
+			kfree(arg_p);
 			printk(KERN_INFO "IBLOCK: Referencing UDEV path: %s\n",
 					ib_dev->ibd_udev_path);
 			ib_dev->ibd_flags |= IBDF_HAS_UDEV_PATH;

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 5a9d2ba..7ff6a35 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c

@@ -441,6 +441,7 @@
 	struct pscsi_dev_virt *pdv,
 	struct se_subsystem_dev *se_dev,
 	struct se_hba *hba)
+	__releases(sh->host_lock)
 {
 	struct se_device *dev;
 	struct pscsi_hba_virt *phv = (struct pscsi_hba_virt *)pdv->pdv_se_hba->hba_ptr;
@@ -488,6 +489,7 @@
 	struct pscsi_dev_virt *pdv,
 	struct se_subsystem_dev *se_dev,
 	struct se_hba *hba)
+	__releases(sh->host_lock)
 {
 	struct se_device *dev;
 	struct pscsi_hba_virt *phv = (struct pscsi_hba_virt *)pdv->pdv_se_hba->hba_ptr;
@@ -522,6 +524,7 @@
 	struct pscsi_dev_virt *pdv,
 	struct se_subsystem_dev *se_dev,
 	struct se_hba *hba)
+	__releases(sh->host_lock)
 {
 	struct se_device *dev;
 	struct pscsi_hba_virt *phv = (struct pscsi_hba_virt *)pdv->pdv_se_hba->hba_ptr;
@@ -555,7 +558,7 @@
 	if (!(pdv)) {
 		printk(KERN_ERR "Unable to locate struct pscsi_dev_virt"
 				" parameter\n");
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	}
 	/*
 	 * If not running in PHV_LLD_SCSI_HOST_NO mode, locate the
@@ -565,7 +568,7 @@
 		if (phv->phv_mode == PHV_LLD_SCSI_HOST_NO) {
 			printk(KERN_ERR "pSCSI: Unable to locate struct"
 				" Scsi_Host for PHV_LLD_SCSI_HOST_NO\n");
-			return NULL;
+			return ERR_PTR(-ENODEV);
 		}
 		/*
 		 * For the newer PHV_VIRUTAL_HOST_ID struct scsi_device
@@ -574,7 +577,7 @@
 		if (!(se_dev->su_dev_flags & SDF_USING_UDEV_PATH)) {
 			printk(KERN_ERR "pSCSI: udev_path attribute has not"
 				" been set before ENABLE=1\n");
-			return NULL;
+			return ERR_PTR(-EINVAL);
 		}
 		/*
 		 * If no scsi_host_id= was passed for PHV_VIRUTAL_HOST_ID,
@@ -587,12 +590,12 @@
 				printk(KERN_ERR "pSCSI: Unable to set hba_mode"
 					" with active devices\n");
 				spin_unlock(&hba->device_lock);
-				return NULL;
+				return ERR_PTR(-EEXIST);
 			}
 			spin_unlock(&hba->device_lock);
 
 			if (pscsi_pmode_enable_hba(hba, 1) != 1)
-				return NULL;
+				return ERR_PTR(-ENODEV);
 
 			legacy_mode_enable = 1;
 			hba->hba_flags |= HBA_FLAGS_PSCSI_MODE;
@@ -602,14 +605,14 @@
 			if (!(sh)) {
 				printk(KERN_ERR "pSCSI: Unable to locate"
 					" pdv_host_id: %d\n", pdv->pdv_host_id);
-				return NULL;
+				return ERR_PTR(-ENODEV);
 			}
 		}
 	} else {
 		if (phv->phv_mode == PHV_VIRUTAL_HOST_ID) {
 			printk(KERN_ERR "pSCSI: PHV_VIRUTAL_HOST_ID set while"
 				" struct Scsi_Host exists\n");
-			return NULL;
+			return ERR_PTR(-EEXIST);
 		}
 	}
 
@@ -644,7 +647,7 @@
 				hba->hba_flags &= ~HBA_FLAGS_PSCSI_MODE;
 			}
 			pdv->pdv_sd = NULL;
-			return NULL;
+			return ERR_PTR(-ENODEV);
 		}
 		return dev;
 	}
@@ -660,7 +663,7 @@
 		hba->hba_flags &= ~HBA_FLAGS_PSCSI_MODE;
 	}
 
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 /*	pscsi_free_device(): (Part of se_subsystem_api_t template)
@@ -816,6 +819,7 @@
 		if (!(pt->pscsi_cdb)) {
 			printk(KERN_ERR "pSCSI: Unable to allocate extended"
 					" pt->pscsi_cdb\n");
+			kfree(pt);
 			return NULL;
 		}
 	} else

diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index 8dc6d74..7837dd3 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c

@@ -150,7 +150,7 @@
 	if (rd_dev->rd_page_count <= 0) {
 		printk(KERN_ERR "Illegal page count: %u for Ramdisk device\n",
 			rd_dev->rd_page_count);
-		return -1;
+		return -EINVAL;
 	}
 	total_sg_needed = rd_dev->rd_page_count;
 
@@ -160,7 +160,7 @@
 	if (!(sg_table)) {
 		printk(KERN_ERR "Unable to allocate memory for Ramdisk"
 			" scatterlist tables\n");
-		return -1;
+		return -ENOMEM;
 	}
 
 	rd_dev->sg_table_array = sg_table;
@@ -175,7 +175,7 @@
 		if (!(sg)) {
 			printk(KERN_ERR "Unable to allocate scatterlist array"
 				" for struct rd_dev\n");
-			return -1;
+			return -ENOMEM;
 		}
 
 		sg_init_table((struct scatterlist *)&sg[0], sg_per_table);
@@ -191,7 +191,7 @@
 			if (!(pg)) {
 				printk(KERN_ERR "Unable to allocate scatterlist"
 					" pages for struct rd_dev_sg_table\n");
-				return -1;
+				return -ENOMEM;
 			}
 			sg_assign_page(&sg[j], pg);
 			sg[j].length = PAGE_SIZE;
@@ -253,12 +253,13 @@
 	struct se_dev_limits dev_limits;
 	struct rd_dev *rd_dev = p;
 	struct rd_host *rd_host = hba->hba_ptr;
-	int dev_flags = 0;
+	int dev_flags = 0, ret;
 	char prod[16], rev[4];
 
 	memset(&dev_limits, 0, sizeof(struct se_dev_limits));
 
-	if (rd_build_device_space(rd_dev) < 0)
+	ret = rd_build_device_space(rd_dev);
+	if (ret < 0)
 		goto fail;
 
 	snprintf(prod, 16, "RAMDISK-%s", (rd_dev->rd_direct) ? "DR" : "MCP");
@@ -292,7 +293,7 @@
 
 fail:
 	rd_release_device_space(rd_dev);
-	return NULL;
+	return ERR_PTR(ret);
 }
 
 static struct se_device *rd_DIRECT_create_virtdevice(

diff --git a/drivers/target/target_core_rd.h b/drivers/target/target_core_rd.h
index 13badfb..3ea19e2 100644
--- a/drivers/target/target_core_rd.h
+++ b/drivers/target/target_core_rd.h

@@ -14,8 +14,6 @@
 #define RD_BLOCKSIZE		512
 #define RD_MAX_SECTORS		1024
 
-extern struct kmem_cache *se_mem_cache;
-
 /* Used in target_core_init_configfs() for virtual LUN 0 access */
 int __init rd_module_init(void);
 void rd_module_exit(void);

diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
new file mode 100644
index 0000000..5e3a067
--- /dev/null
+++ b/drivers/target/target_core_stat.c

@@ -0,0 +1,1810 @@
+/*******************************************************************************
+ * Filename:  target_core_stat.c
+ *
+ * Copyright (c) 2011 Rising Tide Systems
+ * Copyright (c) 2011 Linux-iSCSI.org
+ *
+ * Modern ConfigFS group context specific statistics based on original
+ * target_core_mib.c code
+ *
+ * Copyright (c) 2006-2007 SBE, Inc.  All Rights Reserved.
+ *
+ * Nicholas A. Bellinger <nab@linux-iscsi.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ ******************************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/version.h>
+#include <generated/utsrelease.h>
+#include <linux/utsname.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/blkdev.h>
+#include <linux/configfs.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+
+#include <target/target_core_base.h>
+#include <target/target_core_transport.h>
+#include <target/target_core_fabric_ops.h>
+#include <target/target_core_configfs.h>
+#include <target/configfs_macros.h>
+
+#include "target_core_hba.h"
+
+#ifndef INITIAL_JIFFIES
+#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
+#endif
+
+#define NONE		"None"
+#define ISPRINT(a)   ((a >= ' ') && (a <= '~'))
+
+#define SCSI_LU_INDEX			1
+#define LU_COUNT			1
+
+/*
+ * SCSI Device Table
+ */
+
+CONFIGFS_EATTR_STRUCT(target_stat_scsi_dev, se_dev_stat_grps);
+#define DEV_STAT_SCSI_DEV_ATTR(_name, _mode)				\
+static struct target_stat_scsi_dev_attribute				\
+			target_stat_scsi_dev_##_name =			\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	target_stat_scsi_dev_show_attr_##_name,				\
+	target_stat_scsi_dev_store_attr_##_name);
+
+#define DEV_STAT_SCSI_DEV_ATTR_RO(_name)				\
+static struct target_stat_scsi_dev_attribute				\
+			target_stat_scsi_dev_##_name =			\
+	__CONFIGFS_EATTR_RO(_name,					\
+	target_stat_scsi_dev_show_attr_##_name);
+
+static ssize_t target_stat_scsi_dev_show_attr_inst(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_hba *hba = se_subdev->se_dev_hba;
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index);
+}
+DEV_STAT_SCSI_DEV_ATTR_RO(inst);
+
+static ssize_t target_stat_scsi_dev_show_attr_indx(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index);
+}
+DEV_STAT_SCSI_DEV_ATTR_RO(indx);
+
+static ssize_t target_stat_scsi_dev_show_attr_role(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "Target\n");
+}
+DEV_STAT_SCSI_DEV_ATTR_RO(role);
+
+static ssize_t target_stat_scsi_dev_show_attr_ports(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", dev->dev_port_count);
+}
+DEV_STAT_SCSI_DEV_ATTR_RO(ports);
+
+CONFIGFS_EATTR_OPS(target_stat_scsi_dev, se_dev_stat_grps, scsi_dev_group);
+
+static struct configfs_attribute *target_stat_scsi_dev_attrs[] = {
+	&target_stat_scsi_dev_inst.attr,
+	&target_stat_scsi_dev_indx.attr,
+	&target_stat_scsi_dev_role.attr,
+	&target_stat_scsi_dev_ports.attr,
+	NULL,
+};
+
+static struct configfs_item_operations target_stat_scsi_dev_attrib_ops = {
+	.show_attribute		= target_stat_scsi_dev_attr_show,
+	.store_attribute	= target_stat_scsi_dev_attr_store,
+};
+
+static struct config_item_type target_stat_scsi_dev_cit = {
+	.ct_item_ops		= &target_stat_scsi_dev_attrib_ops,
+	.ct_attrs		= target_stat_scsi_dev_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * SCSI Target Device Table
+ */
+
+CONFIGFS_EATTR_STRUCT(target_stat_scsi_tgt_dev, se_dev_stat_grps);
+#define DEV_STAT_SCSI_TGT_DEV_ATTR(_name, _mode)			\
+static struct target_stat_scsi_tgt_dev_attribute			\
+			target_stat_scsi_tgt_dev_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	target_stat_scsi_tgt_dev_show_attr_##_name,			\
+	target_stat_scsi_tgt_dev_store_attr_##_name);
+
+#define DEV_STAT_SCSI_TGT_DEV_ATTR_RO(_name)				\
+static struct target_stat_scsi_tgt_dev_attribute			\
+			target_stat_scsi_tgt_dev_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,					\
+	target_stat_scsi_tgt_dev_show_attr_##_name);
+
+static ssize_t target_stat_scsi_tgt_dev_show_attr_inst(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_hba *hba = se_subdev->se_dev_hba;
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index);
+}
+DEV_STAT_SCSI_TGT_DEV_ATTR_RO(inst);
+
+static ssize_t target_stat_scsi_tgt_dev_show_attr_indx(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index);
+}
+DEV_STAT_SCSI_TGT_DEV_ATTR_RO(indx);
+
+static ssize_t target_stat_scsi_tgt_dev_show_attr_num_lus(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", LU_COUNT);
+}
+DEV_STAT_SCSI_TGT_DEV_ATTR_RO(num_lus);
+
+static ssize_t target_stat_scsi_tgt_dev_show_attr_status(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+	char status[16];
+
+	if (!dev)
+		return -ENODEV;
+
+	switch (dev->dev_status) {
+	case TRANSPORT_DEVICE_ACTIVATED:
+		strcpy(status, "activated");
+		break;
+	case TRANSPORT_DEVICE_DEACTIVATED:
+		strcpy(status, "deactivated");
+		break;
+	case TRANSPORT_DEVICE_SHUTDOWN:
+		strcpy(status, "shutdown");
+		break;
+	case TRANSPORT_DEVICE_OFFLINE_ACTIVATED:
+	case TRANSPORT_DEVICE_OFFLINE_DEACTIVATED:
+		strcpy(status, "offline");
+		break;
+	default:
+		sprintf(status, "unknown(%d)", dev->dev_status);
+		break;
+	}
+
+	return snprintf(page, PAGE_SIZE, "%s\n", status);
+}
+DEV_STAT_SCSI_TGT_DEV_ATTR_RO(status);
+
+static ssize_t target_stat_scsi_tgt_dev_show_attr_non_access_lus(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+	int non_accessible_lus;
+
+	if (!dev)
+		return -ENODEV;
+
+	switch (dev->dev_status) {
+	case TRANSPORT_DEVICE_ACTIVATED:
+		non_accessible_lus = 0;
+		break;
+	case TRANSPORT_DEVICE_DEACTIVATED:
+	case TRANSPORT_DEVICE_SHUTDOWN:
+	case TRANSPORT_DEVICE_OFFLINE_ACTIVATED:
+	case TRANSPORT_DEVICE_OFFLINE_DEACTIVATED:
+	default:
+		non_accessible_lus = 1;
+		break;
+	}
+
+	return snprintf(page, PAGE_SIZE, "%u\n", non_accessible_lus);
+}
+DEV_STAT_SCSI_TGT_DEV_ATTR_RO(non_access_lus);
+
+static ssize_t target_stat_scsi_tgt_dev_show_attr_resets(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", dev->num_resets);
+}
+DEV_STAT_SCSI_TGT_DEV_ATTR_RO(resets);
+
+
+CONFIGFS_EATTR_OPS(target_stat_scsi_tgt_dev, se_dev_stat_grps, scsi_tgt_dev_group);
+
+static struct configfs_attribute *target_stat_scsi_tgt_dev_attrs[] = {
+	&target_stat_scsi_tgt_dev_inst.attr,
+	&target_stat_scsi_tgt_dev_indx.attr,
+	&target_stat_scsi_tgt_dev_num_lus.attr,
+	&target_stat_scsi_tgt_dev_status.attr,
+	&target_stat_scsi_tgt_dev_non_access_lus.attr,
+	&target_stat_scsi_tgt_dev_resets.attr,
+	NULL,
+};
+
+static struct configfs_item_operations target_stat_scsi_tgt_dev_attrib_ops = {
+	.show_attribute		= target_stat_scsi_tgt_dev_attr_show,
+	.store_attribute	= target_stat_scsi_tgt_dev_attr_store,
+};
+
+static struct config_item_type target_stat_scsi_tgt_dev_cit = {
+	.ct_item_ops		= &target_stat_scsi_tgt_dev_attrib_ops,
+	.ct_attrs		= target_stat_scsi_tgt_dev_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * SCSI Logical Unit Table
+ */
+
+CONFIGFS_EATTR_STRUCT(target_stat_scsi_lu, se_dev_stat_grps);
+#define DEV_STAT_SCSI_LU_ATTR(_name, _mode)				\
+static struct target_stat_scsi_lu_attribute target_stat_scsi_lu_##_name = \
+	__CONFIGFS_EATTR(_name, _mode,					\
+	target_stat_scsi_lu_show_attr_##_name,				\
+	target_stat_scsi_lu_store_attr_##_name);
+
+#define DEV_STAT_SCSI_LU_ATTR_RO(_name)					\
+static struct target_stat_scsi_lu_attribute target_stat_scsi_lu_##_name = \
+	__CONFIGFS_EATTR_RO(_name,					\
+	target_stat_scsi_lu_show_attr_##_name);
+
+static ssize_t target_stat_scsi_lu_show_attr_inst(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_hba *hba = se_subdev->se_dev_hba;
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(inst);
+
+static ssize_t target_stat_scsi_lu_show_attr_dev(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(dev);
+
+static ssize_t target_stat_scsi_lu_show_attr_indx(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	return snprintf(page, PAGE_SIZE, "%u\n", SCSI_LU_INDEX);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(indx);
+
+static ssize_t target_stat_scsi_lu_show_attr_lun(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+	/* FIXME: scsiLuDefaultLun */
+	return snprintf(page, PAGE_SIZE, "%llu\n", (unsigned long long)0);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(lun);
+
+static ssize_t target_stat_scsi_lu_show_attr_lu_name(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+	/* scsiLuWwnName */
+	return snprintf(page, PAGE_SIZE, "%s\n",
+			(strlen(DEV_T10_WWN(dev)->unit_serial)) ?
+			(char *)&DEV_T10_WWN(dev)->unit_serial[0] : "None");
+}
+DEV_STAT_SCSI_LU_ATTR_RO(lu_name);
+
+static ssize_t target_stat_scsi_lu_show_attr_vend(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+	int j;
+	char str[28];
+
+	if (!dev)
+		return -ENODEV;
+	/* scsiLuVendorId */
+	memcpy(&str[0], (void *)DEV_T10_WWN(dev), 28);
+	for (j = 0; j < 8; j++)
+		str[j] = ISPRINT(DEV_T10_WWN(dev)->vendor[j]) ?
+				DEV_T10_WWN(dev)->vendor[j] : 0x20;
+	str[8] = 0;
+	return snprintf(page, PAGE_SIZE, "%s\n", str);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(vend);
+
+static ssize_t target_stat_scsi_lu_show_attr_prod(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+	int j;
+	char str[28];
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuProductId */
+	memcpy(&str[0], (void *)DEV_T10_WWN(dev), 28);
+	for (j = 0; j < 16; j++)
+		str[j] = ISPRINT(DEV_T10_WWN(dev)->model[j]) ?
+				DEV_T10_WWN(dev)->model[j] : 0x20;
+	str[16] = 0;
+	return snprintf(page, PAGE_SIZE, "%s\n", str);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(prod);
+
+static ssize_t target_stat_scsi_lu_show_attr_rev(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+	int j;
+	char str[28];
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuRevisionId */
+	memcpy(&str[0], (void *)DEV_T10_WWN(dev), 28);
+	for (j = 0; j < 4; j++)
+		str[j] = ISPRINT(DEV_T10_WWN(dev)->revision[j]) ?
+				DEV_T10_WWN(dev)->revision[j] : 0x20;
+	str[4] = 0;
+	return snprintf(page, PAGE_SIZE, "%s\n", str);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(rev);
+
+static ssize_t target_stat_scsi_lu_show_attr_dev_type(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuPeripheralType */
+	return snprintf(page, PAGE_SIZE, "%u\n",
+			TRANSPORT(dev)->get_device_type(dev));
+}
+DEV_STAT_SCSI_LU_ATTR_RO(dev_type);
+
+static ssize_t target_stat_scsi_lu_show_attr_status(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuStatus */
+	return snprintf(page, PAGE_SIZE, "%s\n",
+		(dev->dev_status == TRANSPORT_DEVICE_ACTIVATED) ?
+		"available" : "notavailable");
+}
+DEV_STAT_SCSI_LU_ATTR_RO(status);
+
+static ssize_t target_stat_scsi_lu_show_attr_state_bit(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuState */
+	return snprintf(page, PAGE_SIZE, "exposed\n");
+}
+DEV_STAT_SCSI_LU_ATTR_RO(state_bit);
+
+static ssize_t target_stat_scsi_lu_show_attr_num_cmds(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuNumCommands */
+	return snprintf(page, PAGE_SIZE, "%llu\n",
+			(unsigned long long)dev->num_cmds);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(num_cmds);
+
+static ssize_t target_stat_scsi_lu_show_attr_read_mbytes(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuReadMegaBytes */
+	return snprintf(page, PAGE_SIZE, "%u\n", (u32)(dev->read_bytes >> 20));
+}
+DEV_STAT_SCSI_LU_ATTR_RO(read_mbytes);
+
+static ssize_t target_stat_scsi_lu_show_attr_write_mbytes(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuWrittenMegaBytes */
+	return snprintf(page, PAGE_SIZE, "%u\n", (u32)(dev->write_bytes >> 20));
+}
+DEV_STAT_SCSI_LU_ATTR_RO(write_mbytes);
+
+static ssize_t target_stat_scsi_lu_show_attr_resets(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuInResets */
+	return snprintf(page, PAGE_SIZE, "%u\n", dev->num_resets);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(resets);
+
+static ssize_t target_stat_scsi_lu_show_attr_full_stat(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* FIXME: scsiLuOutTaskSetFullStatus */
+	return snprintf(page, PAGE_SIZE, "%u\n", 0);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(full_stat);
+
+static ssize_t target_stat_scsi_lu_show_attr_hs_num_cmds(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* FIXME: scsiLuHSInCommands */
+	return snprintf(page, PAGE_SIZE, "%u\n", 0);
+}
+DEV_STAT_SCSI_LU_ATTR_RO(hs_num_cmds);
+
+static ssize_t target_stat_scsi_lu_show_attr_creation_time(
+	struct se_dev_stat_grps *sgrps, char *page)
+{
+	struct se_subsystem_dev *se_subdev = container_of(sgrps,
+			struct se_subsystem_dev, dev_stat_grps);
+	struct se_device *dev = se_subdev->se_dev_ptr;
+
+	if (!dev)
+		return -ENODEV;
+
+	/* scsiLuCreationTime */
+	return snprintf(page, PAGE_SIZE, "%u\n", (u32)(((u32)dev->creation_time -
+				INITIAL_JIFFIES) * 100 / HZ));
+}
+DEV_STAT_SCSI_LU_ATTR_RO(creation_time);
+
+CONFIGFS_EATTR_OPS(target_stat_scsi_lu, se_dev_stat_grps, scsi_lu_group);
+
+static struct configfs_attribute *target_stat_scsi_lu_attrs[] = {
+	&target_stat_scsi_lu_inst.attr,
+	&target_stat_scsi_lu_dev.attr,
+	&target_stat_scsi_lu_indx.attr,
+	&target_stat_scsi_lu_lun.attr,
+	&target_stat_scsi_lu_lu_name.attr,
+	&target_stat_scsi_lu_vend.attr,
+	&target_stat_scsi_lu_prod.attr,
+	&target_stat_scsi_lu_rev.attr,
+	&target_stat_scsi_lu_dev_type.attr,
+	&target_stat_scsi_lu_status.attr,
+	&target_stat_scsi_lu_state_bit.attr,
+	&target_stat_scsi_lu_num_cmds.attr,
+	&target_stat_scsi_lu_read_mbytes.attr,
+	&target_stat_scsi_lu_write_mbytes.attr,
+	&target_stat_scsi_lu_resets.attr,
+	&target_stat_scsi_lu_full_stat.attr,
+	&target_stat_scsi_lu_hs_num_cmds.attr,
+	&target_stat_scsi_lu_creation_time.attr,
+	NULL,
+};
+
+static struct configfs_item_operations target_stat_scsi_lu_attrib_ops = {
+	.show_attribute		= target_stat_scsi_lu_attr_show,
+	.store_attribute	= target_stat_scsi_lu_attr_store,
+};
+
+static struct config_item_type target_stat_scsi_lu_cit = {
+	.ct_item_ops		= &target_stat_scsi_lu_attrib_ops,
+	.ct_attrs		= target_stat_scsi_lu_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * Called from target_core_configfs.c:target_core_make_subdev() to setup
+ * the target statistics groups + configfs CITs located in target_core_stat.c
+ */
+void target_stat_setup_dev_default_groups(struct se_subsystem_dev *se_subdev)
+{
+	struct config_group *dev_stat_grp = &DEV_STAT_GRP(se_subdev)->stat_group;
+
+	config_group_init_type_name(&DEV_STAT_GRP(se_subdev)->scsi_dev_group,
+			"scsi_dev", &target_stat_scsi_dev_cit);
+	config_group_init_type_name(&DEV_STAT_GRP(se_subdev)->scsi_tgt_dev_group,
+			"scsi_tgt_dev", &target_stat_scsi_tgt_dev_cit);
+	config_group_init_type_name(&DEV_STAT_GRP(se_subdev)->scsi_lu_group,
+			"scsi_lu", &target_stat_scsi_lu_cit);
+
+	dev_stat_grp->default_groups[0] = &DEV_STAT_GRP(se_subdev)->scsi_dev_group;
+	dev_stat_grp->default_groups[1] = &DEV_STAT_GRP(se_subdev)->scsi_tgt_dev_group;
+	dev_stat_grp->default_groups[2] = &DEV_STAT_GRP(se_subdev)->scsi_lu_group;
+	dev_stat_grp->default_groups[3] = NULL;
+}
+
+/*
+ * SCSI Port Table
+ */
+
+CONFIGFS_EATTR_STRUCT(target_stat_scsi_port, se_port_stat_grps);
+#define DEV_STAT_SCSI_PORT_ATTR(_name, _mode)				\
+static struct target_stat_scsi_port_attribute				\
+			target_stat_scsi_port_##_name =			\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	target_stat_scsi_port_show_attr_##_name,			\
+	target_stat_scsi_port_store_attr_##_name);
+
+#define DEV_STAT_SCSI_PORT_ATTR_RO(_name)				\
+static struct target_stat_scsi_port_attribute				\
+			target_stat_scsi_port_##_name =			\
+	__CONFIGFS_EATTR_RO(_name,					\
+	target_stat_scsi_port_show_attr_##_name);
+
+static ssize_t target_stat_scsi_port_show_attr_inst(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_device *dev = lun->lun_se_dev;
+	struct se_hba *hba;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	hba = dev->se_hba;
+	ret = snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_PORT_ATTR_RO(inst);
+
+static ssize_t target_stat_scsi_port_show_attr_dev(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_device *dev = lun->lun_se_dev;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_PORT_ATTR_RO(dev);
+
+static ssize_t target_stat_scsi_port_show_attr_indx(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	ret = snprintf(page, PAGE_SIZE, "%u\n", sep->sep_index);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_PORT_ATTR_RO(indx);
+
+static ssize_t target_stat_scsi_port_show_attr_role(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_device *dev = lun->lun_se_dev;
+	struct se_port *sep;
+	ssize_t ret;
+
+	if (!dev)
+		return -ENODEV;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	ret = snprintf(page, PAGE_SIZE, "%s%u\n", "Device", dev->dev_index);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_PORT_ATTR_RO(role);
+
+static ssize_t target_stat_scsi_port_show_attr_busy_count(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	/* FIXME: scsiPortBusyStatuses  */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", 0);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_PORT_ATTR_RO(busy_count);
+
+CONFIGFS_EATTR_OPS(target_stat_scsi_port, se_port_stat_grps, scsi_port_group);
+
+static struct configfs_attribute *target_stat_scsi_port_attrs[] = {
+	&target_stat_scsi_port_inst.attr,
+	&target_stat_scsi_port_dev.attr,
+	&target_stat_scsi_port_indx.attr,
+	&target_stat_scsi_port_role.attr,
+	&target_stat_scsi_port_busy_count.attr,
+	NULL,
+};
+
+static struct configfs_item_operations target_stat_scsi_port_attrib_ops = {
+	.show_attribute		= target_stat_scsi_port_attr_show,
+	.store_attribute	= target_stat_scsi_port_attr_store,
+};
+
+static struct config_item_type target_stat_scsi_port_cit = {
+	.ct_item_ops		= &target_stat_scsi_port_attrib_ops,
+	.ct_attrs		= target_stat_scsi_port_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * SCSI Target Port Table
+ */
+CONFIGFS_EATTR_STRUCT(target_stat_scsi_tgt_port, se_port_stat_grps);
+#define DEV_STAT_SCSI_TGT_PORT_ATTR(_name, _mode)			\
+static struct target_stat_scsi_tgt_port_attribute			\
+			target_stat_scsi_tgt_port_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	target_stat_scsi_tgt_port_show_attr_##_name,			\
+	target_stat_scsi_tgt_port_store_attr_##_name);
+
+#define DEV_STAT_SCSI_TGT_PORT_ATTR_RO(_name)				\
+static struct target_stat_scsi_tgt_port_attribute			\
+			target_stat_scsi_tgt_port_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,					\
+	target_stat_scsi_tgt_port_show_attr_##_name);
+
+static ssize_t target_stat_scsi_tgt_port_show_attr_inst(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_device *dev = lun->lun_se_dev;
+	struct se_port *sep;
+	struct se_hba *hba;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	hba = dev->se_hba;
+	ret = snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TGT_PORT_ATTR_RO(inst);
+
+static ssize_t target_stat_scsi_tgt_port_show_attr_dev(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_device *dev = lun->lun_se_dev;
+	struct se_port *sep;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	ret = snprintf(page, PAGE_SIZE, "%u\n", dev->dev_index);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TGT_PORT_ATTR_RO(dev);
+
+static ssize_t target_stat_scsi_tgt_port_show_attr_indx(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	ret = snprintf(page, PAGE_SIZE, "%u\n", sep->sep_index);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TGT_PORT_ATTR_RO(indx);
+
+static ssize_t target_stat_scsi_tgt_port_show_attr_name(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	tpg = sep->sep_tpg;
+
+	ret = snprintf(page, PAGE_SIZE, "%sPort#%u\n",
+		TPG_TFO(tpg)->get_fabric_name(), sep->sep_index);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TGT_PORT_ATTR_RO(name);
+
+static ssize_t target_stat_scsi_tgt_port_show_attr_port_index(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	tpg = sep->sep_tpg;
+
+	ret = snprintf(page, PAGE_SIZE, "%s%s%d\n",
+		TPG_TFO(tpg)->tpg_get_wwn(tpg), "+t+",
+		TPG_TFO(tpg)->tpg_get_tag(tpg));
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TGT_PORT_ATTR_RO(port_index);
+
+static ssize_t target_stat_scsi_tgt_port_show_attr_in_cmds(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	tpg = sep->sep_tpg;
+
+	ret = snprintf(page, PAGE_SIZE, "%llu\n", sep->sep_stats.cmd_pdus);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TGT_PORT_ATTR_RO(in_cmds);
+
+static ssize_t target_stat_scsi_tgt_port_show_attr_write_mbytes(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	tpg = sep->sep_tpg;
+
+	ret = snprintf(page, PAGE_SIZE, "%u\n",
+			(u32)(sep->sep_stats.rx_data_octets >> 20));
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TGT_PORT_ATTR_RO(write_mbytes);
+
+static ssize_t target_stat_scsi_tgt_port_show_attr_read_mbytes(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	tpg = sep->sep_tpg;
+
+	ret = snprintf(page, PAGE_SIZE, "%u\n",
+			(u32)(sep->sep_stats.tx_data_octets >> 20));
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TGT_PORT_ATTR_RO(read_mbytes);
+
+static ssize_t target_stat_scsi_tgt_port_show_attr_hs_in_cmds(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	tpg = sep->sep_tpg;
+
+	/* FIXME: scsiTgtPortHsInCommands */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", 0);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TGT_PORT_ATTR_RO(hs_in_cmds);
+
+CONFIGFS_EATTR_OPS(target_stat_scsi_tgt_port, se_port_stat_grps,
+		scsi_tgt_port_group);
+
+static struct configfs_attribute *target_stat_scsi_tgt_port_attrs[] = {
+	&target_stat_scsi_tgt_port_inst.attr,
+	&target_stat_scsi_tgt_port_dev.attr,
+	&target_stat_scsi_tgt_port_indx.attr,
+	&target_stat_scsi_tgt_port_name.attr,
+	&target_stat_scsi_tgt_port_port_index.attr,
+	&target_stat_scsi_tgt_port_in_cmds.attr,
+	&target_stat_scsi_tgt_port_write_mbytes.attr,
+	&target_stat_scsi_tgt_port_read_mbytes.attr,
+	&target_stat_scsi_tgt_port_hs_in_cmds.attr,
+	NULL,
+};
+
+static struct configfs_item_operations target_stat_scsi_tgt_port_attrib_ops = {
+	.show_attribute		= target_stat_scsi_tgt_port_attr_show,
+	.store_attribute	= target_stat_scsi_tgt_port_attr_store,
+};
+
+static struct config_item_type target_stat_scsi_tgt_port_cit = {
+	.ct_item_ops		= &target_stat_scsi_tgt_port_attrib_ops,
+	.ct_attrs		= target_stat_scsi_tgt_port_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * SCSI Transport Table
+o */
+
+CONFIGFS_EATTR_STRUCT(target_stat_scsi_transport, se_port_stat_grps);
+#define DEV_STAT_SCSI_TRANSPORT_ATTR(_name, _mode)			\
+static struct target_stat_scsi_transport_attribute			\
+			target_stat_scsi_transport_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	target_stat_scsi_transport_show_attr_##_name,			\
+	target_stat_scsi_transport_store_attr_##_name);
+
+#define DEV_STAT_SCSI_TRANSPORT_ATTR_RO(_name)				\
+static struct target_stat_scsi_transport_attribute			\
+			target_stat_scsi_transport_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,					\
+	target_stat_scsi_transport_show_attr_##_name);
+
+static ssize_t target_stat_scsi_transport_show_attr_inst(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_device *dev = lun->lun_se_dev;
+	struct se_port *sep;
+	struct se_hba *hba;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+
+	hba = dev->se_hba;
+	ret = snprintf(page, PAGE_SIZE, "%u\n", hba->hba_index);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TRANSPORT_ATTR_RO(inst);
+
+static ssize_t target_stat_scsi_transport_show_attr_device(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	tpg = sep->sep_tpg;
+	/* scsiTransportType */
+	ret = snprintf(page, PAGE_SIZE, "scsiTransport%s\n",
+			TPG_TFO(tpg)->get_fabric_name());
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TRANSPORT_ATTR_RO(device);
+
+static ssize_t target_stat_scsi_transport_show_attr_indx(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_port *sep;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	tpg = sep->sep_tpg;
+	ret = snprintf(page, PAGE_SIZE, "%u\n",
+			TPG_TFO(tpg)->tpg_get_inst_index(tpg));
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TRANSPORT_ATTR_RO(indx);
+
+static ssize_t target_stat_scsi_transport_show_attr_dev_name(
+	struct se_port_stat_grps *pgrps, char *page)
+{
+	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
+	struct se_device *dev = lun->lun_se_dev;
+	struct se_port *sep;
+	struct se_portal_group *tpg;
+	struct t10_wwn *wwn;
+	ssize_t ret;
+
+	spin_lock(&lun->lun_sep_lock);
+	sep = lun->lun_sep;
+	if (!sep) {
+		spin_unlock(&lun->lun_sep_lock);
+		return -ENODEV;
+	}
+	tpg = sep->sep_tpg;
+	wwn = DEV_T10_WWN(dev);
+	/* scsiTransportDevName */
+	ret = snprintf(page, PAGE_SIZE, "%s+%s\n",
+			TPG_TFO(tpg)->tpg_get_wwn(tpg),
+			(strlen(wwn->unit_serial)) ? wwn->unit_serial :
+			wwn->vendor);
+	spin_unlock(&lun->lun_sep_lock);
+	return ret;
+}
+DEV_STAT_SCSI_TRANSPORT_ATTR_RO(dev_name);
+
+CONFIGFS_EATTR_OPS(target_stat_scsi_transport, se_port_stat_grps,
+		scsi_transport_group);
+
+static struct configfs_attribute *target_stat_scsi_transport_attrs[] = {
+	&target_stat_scsi_transport_inst.attr,
+	&target_stat_scsi_transport_device.attr,
+	&target_stat_scsi_transport_indx.attr,
+	&target_stat_scsi_transport_dev_name.attr,
+	NULL,
+};
+
+static struct configfs_item_operations target_stat_scsi_transport_attrib_ops = {
+	.show_attribute		= target_stat_scsi_transport_attr_show,
+	.store_attribute	= target_stat_scsi_transport_attr_store,
+};
+
+static struct config_item_type target_stat_scsi_transport_cit = {
+	.ct_item_ops		= &target_stat_scsi_transport_attrib_ops,
+	.ct_attrs		= target_stat_scsi_transport_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * Called from target_core_fabric_configfs.c:target_fabric_make_lun() to setup
+ * the target port statistics groups + configfs CITs located in target_core_stat.c
+ */
+void target_stat_setup_port_default_groups(struct se_lun *lun)
+{
+	struct config_group *port_stat_grp = &PORT_STAT_GRP(lun)->stat_group;
+
+	config_group_init_type_name(&PORT_STAT_GRP(lun)->scsi_port_group,
+			"scsi_port", &target_stat_scsi_port_cit);
+	config_group_init_type_name(&PORT_STAT_GRP(lun)->scsi_tgt_port_group,
+			"scsi_tgt_port", &target_stat_scsi_tgt_port_cit);
+	config_group_init_type_name(&PORT_STAT_GRP(lun)->scsi_transport_group,
+			"scsi_transport", &target_stat_scsi_transport_cit);
+
+	port_stat_grp->default_groups[0] = &PORT_STAT_GRP(lun)->scsi_port_group;
+	port_stat_grp->default_groups[1] = &PORT_STAT_GRP(lun)->scsi_tgt_port_group;
+	port_stat_grp->default_groups[2] = &PORT_STAT_GRP(lun)->scsi_transport_group;
+	port_stat_grp->default_groups[3] = NULL;
+}
+
+/*
+ * SCSI Authorized Initiator Table
+ */
+
+CONFIGFS_EATTR_STRUCT(target_stat_scsi_auth_intr, se_ml_stat_grps);
+#define DEV_STAT_SCSI_AUTH_INTR_ATTR(_name, _mode)			\
+static struct target_stat_scsi_auth_intr_attribute			\
+			target_stat_scsi_auth_intr_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	target_stat_scsi_auth_intr_show_attr_##_name,			\
+	target_stat_scsi_auth_intr_store_attr_##_name);
+
+#define DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(_name)				\
+static struct target_stat_scsi_auth_intr_attribute			\
+			target_stat_scsi_auth_intr_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,					\
+	target_stat_scsi_auth_intr_show_attr_##_name);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_inst(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	tpg = nacl->se_tpg;
+	/* scsiInstIndex */
+	ret = snprintf(page, PAGE_SIZE, "%u\n",
+			TPG_TFO(tpg)->tpg_get_inst_index(tpg));
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(inst);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_dev(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	struct se_lun *lun;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	tpg = nacl->se_tpg;
+	lun = deve->se_lun;
+	/* scsiDeviceIndex */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_se_dev->dev_index);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(dev);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_port(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	tpg = nacl->se_tpg;
+	/* scsiAuthIntrTgtPortIndex */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", TPG_TFO(tpg)->tpg_get_tag(tpg));
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(port);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_indx(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* scsiAuthIntrIndex */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", nacl->acl_index);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(indx);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_dev_or_port(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* scsiAuthIntrDevOrPort */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", 1);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(dev_or_port);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_intr_name(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* scsiAuthIntrName */
+	ret = snprintf(page, PAGE_SIZE, "%s\n", nacl->initiatorname);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(intr_name);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_map_indx(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* FIXME: scsiAuthIntrLunMapIndex */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", 0);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(map_indx);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_att_count(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* scsiAuthIntrAttachedTimes */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", deve->attach_count);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(att_count);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_num_cmds(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* scsiAuthIntrOutCommands */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", deve->total_cmds);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(num_cmds);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_read_mbytes(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* scsiAuthIntrReadMegaBytes */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", (u32)(deve->read_bytes >> 20));
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(read_mbytes);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_write_mbytes(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* scsiAuthIntrWrittenMegaBytes */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", (u32)(deve->write_bytes >> 20));
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(write_mbytes);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_hs_num_cmds(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* FIXME: scsiAuthIntrHSOutCommands */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", 0);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(hs_num_cmds);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_creation_time(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* scsiAuthIntrLastCreation */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", (u32)(((u32)deve->creation_time -
+				INITIAL_JIFFIES) * 100 / HZ));
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(creation_time);
+
+static ssize_t target_stat_scsi_auth_intr_show_attr_row_status(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* FIXME: scsiAuthIntrRowStatus */
+	ret = snprintf(page, PAGE_SIZE, "Ready\n");
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_AUTH_INTR_ATTR_RO(row_status);
+
+CONFIGFS_EATTR_OPS(target_stat_scsi_auth_intr, se_ml_stat_grps,
+		scsi_auth_intr_group);
+
+static struct configfs_attribute *target_stat_scsi_auth_intr_attrs[] = {
+	&target_stat_scsi_auth_intr_inst.attr,
+	&target_stat_scsi_auth_intr_dev.attr,
+	&target_stat_scsi_auth_intr_port.attr,
+	&target_stat_scsi_auth_intr_indx.attr,
+	&target_stat_scsi_auth_intr_dev_or_port.attr,
+	&target_stat_scsi_auth_intr_intr_name.attr,
+	&target_stat_scsi_auth_intr_map_indx.attr,
+	&target_stat_scsi_auth_intr_att_count.attr,
+	&target_stat_scsi_auth_intr_num_cmds.attr,
+	&target_stat_scsi_auth_intr_read_mbytes.attr,
+	&target_stat_scsi_auth_intr_write_mbytes.attr,
+	&target_stat_scsi_auth_intr_hs_num_cmds.attr,
+	&target_stat_scsi_auth_intr_creation_time.attr,
+	&target_stat_scsi_auth_intr_row_status.attr,
+	NULL,
+};
+
+static struct configfs_item_operations target_stat_scsi_auth_intr_attrib_ops = {
+	.show_attribute		= target_stat_scsi_auth_intr_attr_show,
+	.store_attribute	= target_stat_scsi_auth_intr_attr_store,
+};
+
+static struct config_item_type target_stat_scsi_auth_intr_cit = {
+	.ct_item_ops		= &target_stat_scsi_auth_intr_attrib_ops,
+	.ct_attrs		= target_stat_scsi_auth_intr_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * SCSI Attached Initiator Port Table
+ */
+
+CONFIGFS_EATTR_STRUCT(target_stat_scsi_att_intr_port, se_ml_stat_grps);
+#define DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR(_name, _mode)			\
+static struct target_stat_scsi_att_intr_port_attribute			\
+		target_stat_scsi_att_intr_port_##_name =		\
+	__CONFIGFS_EATTR(_name, _mode,					\
+	target_stat_scsi_att_intr_port_show_attr_##_name,		\
+	target_stat_scsi_att_intr_port_store_attr_##_name);
+
+#define DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(_name)			\
+static struct target_stat_scsi_att_intr_port_attribute			\
+		target_stat_scsi_att_intr_port_##_name =		\
+	__CONFIGFS_EATTR_RO(_name,					\
+	target_stat_scsi_att_intr_port_show_attr_##_name);
+
+static ssize_t target_stat_scsi_att_intr_port_show_attr_inst(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	tpg = nacl->se_tpg;
+	/* scsiInstIndex */
+	ret = snprintf(page, PAGE_SIZE, "%u\n",
+			TPG_TFO(tpg)->tpg_get_inst_index(tpg));
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(inst);
+
+static ssize_t target_stat_scsi_att_intr_port_show_attr_dev(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	struct se_lun *lun;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	tpg = nacl->se_tpg;
+	lun = deve->se_lun;
+	/* scsiDeviceIndex */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_se_dev->dev_index);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(dev);
+
+static ssize_t target_stat_scsi_att_intr_port_show_attr_port(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	tpg = nacl->se_tpg;
+	/* scsiPortIndex */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", TPG_TFO(tpg)->tpg_get_tag(tpg));
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(port);
+
+static ssize_t target_stat_scsi_att_intr_port_show_attr_indx(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_session *se_sess;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->nacl_sess_lock);
+	se_sess = nacl->nacl_sess;
+	if (!se_sess) {
+		spin_unlock_irq(&nacl->nacl_sess_lock);
+		return -ENODEV;
+	}
+
+	tpg = nacl->se_tpg;
+	/* scsiAttIntrPortIndex */
+	ret = snprintf(page, PAGE_SIZE, "%u\n",
+			TPG_TFO(tpg)->sess_get_index(se_sess));
+	spin_unlock_irq(&nacl->nacl_sess_lock);
+	return ret;
+}
+DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(indx);
+
+static ssize_t target_stat_scsi_att_intr_port_show_attr_port_auth_indx(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_dev_entry *deve;
+	ssize_t ret;
+
+	spin_lock_irq(&nacl->device_list_lock);
+	deve = &nacl->device_list[lacl->mapped_lun];
+	if (!deve->se_lun || !deve->se_lun_acl) {
+		spin_unlock_irq(&nacl->device_list_lock);
+		return -ENODEV;
+	}
+	/* scsiAttIntrPortAuthIntrIdx */
+	ret = snprintf(page, PAGE_SIZE, "%u\n", nacl->acl_index);
+	spin_unlock_irq(&nacl->device_list_lock);
+	return ret;
+}
+DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(port_auth_indx);
+
+static ssize_t target_stat_scsi_att_intr_port_show_attr_port_ident(
+	struct se_ml_stat_grps *lgrps, char *page)
+{
+	struct se_lun_acl *lacl = container_of(lgrps,
+			struct se_lun_acl, ml_stat_grps);
+	struct se_node_acl *nacl = lacl->se_lun_nacl;
+	struct se_session *se_sess;
+	struct se_portal_group *tpg;
+	ssize_t ret;
+	unsigned char buf[64];
+
+	spin_lock_irq(&nacl->nacl_sess_lock);
+	se_sess = nacl->nacl_sess;
+	if (!se_sess) {
+		spin_unlock_irq(&nacl->nacl_sess_lock);
+		return -ENODEV;
+	}
+
+	tpg = nacl->se_tpg;
+	/* scsiAttIntrPortName+scsiAttIntrPortIdentifier */
+	memset(buf, 0, 64);
+	if (TPG_TFO(tpg)->sess_get_initiator_sid != NULL)
+		TPG_TFO(tpg)->sess_get_initiator_sid(se_sess,
+				(unsigned char *)&buf[0], 64);
+
+	ret = snprintf(page, PAGE_SIZE, "%s+i+%s\n", nacl->initiatorname, buf);
+	spin_unlock_irq(&nacl->nacl_sess_lock);
+	return ret;
+}
+DEV_STAT_SCSI_ATTR_INTR_PORT_ATTR_RO(port_ident);
+
+CONFIGFS_EATTR_OPS(target_stat_scsi_att_intr_port, se_ml_stat_grps,
+		scsi_att_intr_port_group);
+
+static struct configfs_attribute *target_stat_scsi_ath_intr_port_attrs[] = {
+	&target_stat_scsi_att_intr_port_inst.attr,
+	&target_stat_scsi_att_intr_port_dev.attr,
+	&target_stat_scsi_att_intr_port_port.attr,
+	&target_stat_scsi_att_intr_port_indx.attr,
+	&target_stat_scsi_att_intr_port_port_auth_indx.attr,
+	&target_stat_scsi_att_intr_port_port_ident.attr,
+	NULL,
+};
+
+static struct configfs_item_operations target_stat_scsi_att_intr_port_attrib_ops = {
+	.show_attribute		= target_stat_scsi_att_intr_port_attr_show,
+	.store_attribute	= target_stat_scsi_att_intr_port_attr_store,
+};
+
+static struct config_item_type target_stat_scsi_att_intr_port_cit = {
+	.ct_item_ops		= &target_stat_scsi_att_intr_port_attrib_ops,
+	.ct_attrs		= target_stat_scsi_ath_intr_port_attrs,
+	.ct_owner		= THIS_MODULE,
+};
+
+/*
+ * Called from target_core_fabric_configfs.c:target_fabric_make_mappedlun() to setup
+ * the target MappedLUN statistics groups + configfs CITs located in target_core_stat.c
+ */
+void target_stat_setup_mappedlun_default_groups(struct se_lun_acl *lacl)
+{
+	struct config_group *ml_stat_grp = &ML_STAT_GRPS(lacl)->stat_group;
+
+	config_group_init_type_name(&ML_STAT_GRPS(lacl)->scsi_auth_intr_group,
+			"scsi_auth_intr", &target_stat_scsi_auth_intr_cit);
+	config_group_init_type_name(&ML_STAT_GRPS(lacl)->scsi_att_intr_port_group,
+			"scsi_att_intr_port", &target_stat_scsi_att_intr_port_cit);
+
+	ml_stat_grp->default_groups[0] = &ML_STAT_GRPS(lacl)->scsi_auth_intr_group;
+	ml_stat_grp->default_groups[1] = &ML_STAT_GRPS(lacl)->scsi_att_intr_port_group;
+	ml_stat_grp->default_groups[2] = NULL;
+}

diff --git a/drivers/target/target_core_stat.h b/drivers/target/target_core_stat.h
new file mode 100644
index 0000000..86c252f
--- /dev/null
+++ b/drivers/target/target_core_stat.h

@@ -0,0 +1,8 @@
+#ifndef TARGET_CORE_STAT_H
+#define TARGET_CORE_STAT_H
+
+extern void target_stat_setup_dev_default_groups(struct se_subsystem_dev *);
+extern void target_stat_setup_port_default_groups(struct se_lun *);
+extern void target_stat_setup_mappedlun_default_groups(struct se_lun_acl *);
+
+#endif   /*** TARGET_CORE_STAT_H ***/

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ff9ace0..bf6aa8a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c

@@ -227,8 +227,6 @@
 static int transport_set_sense_codes(struct se_cmd *cmd, u8 asc, u8 ascq);
 static void transport_stop_all_task_timers(struct se_cmd *cmd);
 
-int transport_emulate_control_cdb(struct se_task *task);
-
 int init_se_global(void)
 {
 	struct se_global *global;
@@ -1622,7 +1620,7 @@
 	const char *inquiry_prod,
 	const char *inquiry_rev)
 {
-	int ret = 0, force_pt;
+	int force_pt;
 	struct se_device  *dev;
 
 	dev = kzalloc(sizeof(struct se_device), GFP_KERNEL);
@@ -1739,9 +1737,8 @@
 	}
 	scsi_dump_inquiry(dev);
 
+	return dev;
 out:
-	if (!ret)
-		return dev;
 	kthread_stop(dev->process_thread);
 
 	spin_lock(&hba->device_lock);
@@ -4359,11 +4356,9 @@
 			printk(KERN_ERR "Unable to allocate struct se_mem\n");
 			goto out;
 		}
-		INIT_LIST_HEAD(&se_mem->se_list);
-		se_mem->se_len = (length > dma_size) ? dma_size : length;
 
 /* #warning FIXME Allocate contigous pages for struct se_mem elements */
-		se_mem->se_page = (struct page *) alloc_pages(GFP_KERNEL, 0);
+		se_mem->se_page = alloc_pages(GFP_KERNEL, 0);
 		if (!(se_mem->se_page)) {
 			printk(KERN_ERR "alloc_pages() failed\n");
 			goto out;
@@ -4374,6 +4369,8 @@
 			printk(KERN_ERR "kmap_atomic() failed\n");
 			goto out;
 		}
+		INIT_LIST_HEAD(&se_mem->se_list);
+		se_mem->se_len = (length > dma_size) ? dma_size : length;
 		memset(buf, 0, se_mem->se_len);
 		kunmap_atomic(buf, KM_IRQ0);
 
@@ -4392,10 +4389,13 @@
 
 	return 0;
 out:
+	if (se_mem)
+		__free_pages(se_mem->se_page, 0);
+	kmem_cache_free(se_mem_cache, se_mem);
 	return -1;
 }
 
-extern u32 transport_calc_sg_num(
+u32 transport_calc_sg_num(
 	struct se_task *task,
 	struct se_mem *in_se_mem,
 	u32 task_offset)
@@ -5834,31 +5834,26 @@
 	int ret;
 
 	switch (tmr->function) {
-	case ABORT_TASK:
+	case TMR_ABORT_TASK:
 		ref_cmd = tmr->ref_cmd;
 		tmr->response = TMR_FUNCTION_REJECTED;
 		break;
-	case ABORT_TASK_SET:
-	case CLEAR_ACA:
-	case CLEAR_TASK_SET:
+	case TMR_ABORT_TASK_SET:
+	case TMR_CLEAR_ACA:
+	case TMR_CLEAR_TASK_SET:
 		tmr->response = TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
 		break;
-	case LUN_RESET:
+	case TMR_LUN_RESET:
 		ret = core_tmr_lun_reset(dev, tmr, NULL, NULL);
 		tmr->response = (!ret) ? TMR_FUNCTION_COMPLETE :
 					 TMR_FUNCTION_REJECTED;
 		break;
-#if 0
-	case TARGET_WARM_RESET:
-		transport_generic_host_reset(dev->se_hba);
+	case TMR_TARGET_WARM_RESET:
 		tmr->response = TMR_FUNCTION_REJECTED;
 		break;
-	case TARGET_COLD_RESET:
-		transport_generic_host_reset(dev->se_hba);
-		transport_generic_cold_reset(dev->se_hba);
+	case TMR_TARGET_COLD_RESET:
 		tmr->response = TMR_FUNCTION_REJECTED;
 		break;
-#endif
 	default:
 		printk(KERN_ERR "Uknown TMR function: 0x%02x.\n",
 				tmr->function);

diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index 25a8bc5..87e7e6c 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c

@@ -131,7 +131,7 @@
 
 static int kgdboc_option_setup(char *opt)
 {
-	if (strlen(opt) > MAX_CONFIG_LEN) {
+	if (strlen(opt) >= MAX_CONFIG_LEN) {
 		printk(KERN_ERR "kgdboc: config string too long\n");
 		return -ENOSPC;
 	}

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 81f1395..43db715 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c

@@ -306,7 +306,7 @@
 
 static void sysrq_handle_showmem(int key)
 {
-	show_mem();
+	show_mem(0);
 }
 static struct sysrq_key_op sysrq_showmem_op = {
 	.handler	= sysrq_handle_showmem,

diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 6dd3c68..d6b342b 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c

@@ -600,7 +600,7 @@
 
 static void fn_show_mem(struct vc_data *vc)
 {
-	show_mem();
+	show_mem(0);
 }
 
 static void fn_show_state(struct vc_data *vc)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 54f9237..475f9c5 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h

@@ -61,8 +61,6 @@
 		current->pid, __func__, ##args);	\
 } while (0)
 
-extern spinlock_t autofs4_lock;
-
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 1442da4..509fe1e 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c

@@ -372,6 +372,10 @@
 		return -EBUSY;
 	} else {
 		struct file *pipe = fget(pipefd);
+		if (!pipe) {
+			err = -EBADF;
+			goto out;
+		}
 		if (!pipe->f_op || !pipe->f_op->write) {
 			err = -EPIPE;
 			fput(pipe);

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index f43100b..450f529 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c

@@ -87,18 +87,70 @@
 }
 
 /*
+ * Calculate and dget next entry in the subdirs list under root.
+ */
+static struct dentry *get_next_positive_subdir(struct dentry *prev,
+						struct dentry *root)
+{
+	struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
+	struct list_head *next;
+	struct dentry *p, *q;
+
+	spin_lock(&sbi->lookup_lock);
+
+	if (prev == NULL) {
+		spin_lock(&root->d_lock);
+		prev = dget_dlock(root);
+		next = prev->d_subdirs.next;
+		p = prev;
+		goto start;
+	}
+
+	p = prev;
+	spin_lock(&p->d_lock);
+again:
+	next = p->d_u.d_child.next;
+start:
+	if (next == &root->d_subdirs) {
+		spin_unlock(&p->d_lock);
+		spin_unlock(&sbi->lookup_lock);
+		dput(prev);
+		return NULL;
+	}
+
+	q = list_entry(next, struct dentry, d_u.d_child);
+
+	spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
+	/* Negative dentry - try next */
+	if (!simple_positive(q)) {
+		spin_unlock(&p->d_lock);
+		p = q;
+		goto again;
+	}
+	dget_dlock(q);
+	spin_unlock(&q->d_lock);
+	spin_unlock(&p->d_lock);
+	spin_unlock(&sbi->lookup_lock);
+
+	dput(prev);
+
+	return q;
+}
+
+/*
  * Calculate and dget next entry in top down tree traversal.
  */
 static struct dentry *get_next_positive_dentry(struct dentry *prev,
 						struct dentry *root)
 {
+	struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
 	struct list_head *next;
 	struct dentry *p, *ret;
 
 	if (prev == NULL)
 		return dget(root);
 
-	spin_lock(&autofs4_lock);
+	spin_lock(&sbi->lookup_lock);
 relock:
 	p = prev;
 	spin_lock(&p->d_lock);
@@ -110,7 +162,7 @@
 
 			if (p == root) {
 				spin_unlock(&p->d_lock);
-				spin_unlock(&autofs4_lock);
+				spin_unlock(&sbi->lookup_lock);
 				dput(prev);
 				return NULL;
 			}
@@ -140,7 +192,7 @@
 	dget_dlock(ret);
 	spin_unlock(&ret->d_lock);
 	spin_unlock(&p->d_lock);
-	spin_unlock(&autofs4_lock);
+	spin_unlock(&sbi->lookup_lock);
 
 	dput(prev);
 
@@ -290,11 +342,8 @@
 	spin_lock(&sbi->fs_lock);
 	ino = autofs4_dentry_ino(root);
 	/* No point expiring a pending mount */
-	if (ino->flags & AUTOFS_INF_PENDING) {
-		spin_unlock(&sbi->fs_lock);
-		return NULL;
-	}
-	managed_dentry_set_transit(root);
+	if (ino->flags & AUTOFS_INF_PENDING)
+		goto out;
 	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
 		struct autofs_info *ino = autofs4_dentry_ino(root);
 		ino->flags |= AUTOFS_INF_EXPIRING;
@@ -302,7 +351,7 @@
 		spin_unlock(&sbi->fs_lock);
 		return root;
 	}
-	managed_dentry_clear_transit(root);
+out:
 	spin_unlock(&sbi->fs_lock);
 	dput(root);
 
@@ -336,13 +385,12 @@
 	timeout = sbi->exp_timeout;
 
 	dentry = NULL;
-	while ((dentry = get_next_positive_dentry(dentry, root))) {
+	while ((dentry = get_next_positive_subdir(dentry, root))) {
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
 		/* No point expiring a pending mount */
 		if (ino->flags & AUTOFS_INF_PENDING)
-			goto cont;
-		managed_dentry_set_transit(dentry);
+			goto next;
 
 		/*
 		 * Case 1: (i) indirect mount or top level pseudo direct mount
@@ -402,8 +450,6 @@
 			}
 		}
 next:
-		managed_dentry_clear_transit(dentry);
-cont:
 		spin_unlock(&sbi->fs_lock);
 	}
 	return NULL;
@@ -415,13 +461,13 @@
 	ino->flags |= AUTOFS_INF_EXPIRING;
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
-	spin_lock(&autofs4_lock);
+	spin_lock(&sbi->lookup_lock);
 	spin_lock(&expired->d_parent->d_lock);
 	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
 	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
 	spin_unlock(&expired->d_lock);
 	spin_unlock(&expired->d_parent->d_lock);
-	spin_unlock(&autofs4_lock);
+	spin_unlock(&sbi->lookup_lock);
 	return expired;
 }
 
@@ -484,8 +530,6 @@
 	spin_lock(&sbi->fs_lock);
 	ino = autofs4_dentry_ino(dentry);
 	ino->flags &= ~AUTOFS_INF_EXPIRING;
-	if (!d_unhashed(dentry))
-		managed_dentry_clear_transit(dentry);
 	complete_all(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 
@@ -513,9 +557,7 @@
 		spin_lock(&sbi->fs_lock);
 		ino->flags &= ~AUTOFS_INF_EXPIRING;
 		spin_lock(&dentry->d_lock);
-		if (ret)
-			__managed_dentry_clear_transit(dentry);
-		else {
+		if (!ret) {
 			if ((IS_ROOT(dentry) ||
 			    (autofs_type_indirect(sbi->type) &&
 			     IS_ROOT(dentry->d_parent))) &&

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e6f84d2..96804a1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c

@@ -23,8 +23,6 @@
 
 #include "autofs_i.h"
 
-DEFINE_SPINLOCK(autofs4_lock);
-
 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
 static int autofs4_dir_unlink(struct inode *,struct dentry *);
 static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -125,15 +123,15 @@
 	 * autofs file system so just let the libfs routines handle
 	 * it.
 	 */
-	spin_lock(&autofs4_lock);
+	spin_lock(&sbi->lookup_lock);
 	spin_lock(&dentry->d_lock);
 	if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&autofs4_lock);
+		spin_unlock(&sbi->lookup_lock);
 		return -ENOENT;
 	}
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&autofs4_lock);
+	spin_unlock(&sbi->lookup_lock);
 
 out:
 	return dcache_dir_open(inode, file);
@@ -171,7 +169,6 @@
 	const unsigned char *str = name->name;
 	struct list_head *p, *head;
 
-	spin_lock(&autofs4_lock);
 	spin_lock(&sbi->lookup_lock);
 	head = &sbi->active_list;
 	list_for_each(p, head) {
@@ -204,14 +201,12 @@
 			dget_dlock(active);
 			spin_unlock(&active->d_lock);
 			spin_unlock(&sbi->lookup_lock);
-			spin_unlock(&autofs4_lock);
 			return active;
 		}
 next:
 		spin_unlock(&active->d_lock);
 	}
 	spin_unlock(&sbi->lookup_lock);
-	spin_unlock(&autofs4_lock);
 
 	return NULL;
 }
@@ -226,7 +221,6 @@
 	const unsigned char *str = name->name;
 	struct list_head *p, *head;
 
-	spin_lock(&autofs4_lock);
 	spin_lock(&sbi->lookup_lock);
 	head = &sbi->expiring_list;
 	list_for_each(p, head) {
@@ -259,14 +253,12 @@
 			dget_dlock(expiring);
 			spin_unlock(&expiring->d_lock);
 			spin_unlock(&sbi->lookup_lock);
-			spin_unlock(&autofs4_lock);
 			return expiring;
 		}
 next:
 		spin_unlock(&expiring->d_lock);
 	}
 	spin_unlock(&sbi->lookup_lock);
-	spin_unlock(&autofs4_lock);
 
 	return NULL;
 }
@@ -275,17 +267,16 @@
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
-	int status;
+	int status = 0;
 
 	if (ino->flags & AUTOFS_INF_PENDING) {
 		DPRINTK("waiting for mount name=%.*s",
 			dentry->d_name.len, dentry->d_name.name);
 		status = autofs4_wait(sbi, dentry, NFY_MOUNT);
 		DPRINTK("mount wait done status=%d", status);
-		ino->last_used = jiffies;
-		return status;
 	}
-	return 0;
+	ino->last_used = jiffies;
+	return status;
 }
 
 static int do_expire_wait(struct dentry *dentry)
@@ -319,9 +310,12 @@
 	 */
 	if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
 		struct dentry *parent = dentry->d_parent;
+		struct autofs_info *ino;
 		struct dentry *new = d_lookup(parent, &dentry->d_name);
 		if (!new)
 			return NULL;
+		ino = autofs4_dentry_ino(new);
+		ino->last_used = jiffies;
 		dput(path->dentry);
 		path->dentry = new;
 	}
@@ -338,18 +332,6 @@
 	DPRINTK("dentry=%p %.*s",
 		dentry, dentry->d_name.len, dentry->d_name.name);
 
-	/*
-	 * Someone may have manually umounted this or it was a submount
-	 * that has gone away.
-	 */
-	spin_lock(&dentry->d_lock);
-	if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
-		if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
-		     (dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
-			__managed_dentry_set_transit(path->dentry);
-	}
-	spin_unlock(&dentry->d_lock);
-
 	/* The daemon never triggers a mount. */
 	if (autofs4_oz_mode(sbi))
 		return NULL;
@@ -418,18 +400,17 @@
 done:
 	if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
 		/*
-		 * Any needed mounting has been completed and the path updated
-		 * so turn this into a normal dentry so we don't continually
-		 * call ->d_automount() and ->d_manage().
-		 */
-		spin_lock(&dentry->d_lock);
-		__managed_dentry_clear_transit(dentry);
-		/*
+		 * Any needed mounting has been completed and the path
+		 * updated so clear DCACHE_NEED_AUTOMOUNT so we don't
+		 * call ->d_automount() on rootless multi-mounts since
+		 * it can lead to an incorrect ELOOP error return.
+		 *
 		 * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and
 		 * symlinks as in all other cases the dentry will be covered by
 		 * an actual mount so ->d_automount() won't be called during
 		 * the follow.
 		 */
+		spin_lock(&dentry->d_lock);
 		if ((!d_mountpoint(dentry) &&
 		    !list_empty(&dentry->d_subdirs)) ||
 		    (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)))
@@ -455,6 +436,8 @@
 
 	/* The daemon never waits. */
 	if (autofs4_oz_mode(sbi)) {
+		if (rcu_walk)
+			return 0;
 		if (!d_mountpoint(dentry))
 			return -EISDIR;
 		return 0;
@@ -612,12 +595,12 @@
 
 	dir->i_mtime = CURRENT_TIME;
 
-	spin_lock(&autofs4_lock);
-	autofs4_add_expiring(dentry);
+	spin_lock(&sbi->lookup_lock);
+	__autofs4_add_expiring(dentry);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&autofs4_lock);
+	spin_unlock(&sbi->lookup_lock);
 
 	return 0;
 }
@@ -686,20 +669,17 @@
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
 
-	spin_lock(&autofs4_lock);
 	spin_lock(&sbi->lookup_lock);
 	spin_lock(&dentry->d_lock);
 	if (!list_empty(&dentry->d_subdirs)) {
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&sbi->lookup_lock);
-		spin_unlock(&autofs4_lock);
 		return -ENOTEMPTY;
 	}
 	__autofs4_add_expiring(dentry);
-	spin_unlock(&sbi->lookup_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&autofs4_lock);
+	spin_unlock(&sbi->lookup_lock);
 
 	if (sbi->version < 5)
 		autofs_clear_leaf_automount_flags(dentry);

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 5601005..2543598 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c

@@ -197,12 +197,12 @@
 
 	seq = read_seqbegin(&rename_lock);
 	rcu_read_lock();
-	spin_lock(&autofs4_lock);
+	spin_lock(&sbi->fs_lock);
 	for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
 		len += tmp->d_name.len + 1;
 
 	if (!len || --len > NAME_MAX) {
-		spin_unlock(&autofs4_lock);
+		spin_unlock(&sbi->fs_lock);
 		rcu_read_unlock();
 		if (read_seqretry(&rename_lock, seq))
 			goto rename_retry;
@@ -218,7 +218,7 @@
 		p -= tmp->d_name.len;
 		strncpy(p, tmp->d_name.name, tmp->d_name.len);
 	}
-	spin_unlock(&autofs4_lock);
+	spin_unlock(&sbi->fs_lock);
 	rcu_read_unlock();
 	if (read_seqretry(&rename_lock, seq))
 		goto rename_retry;

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7d02afb..c1511c6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -55,11 +55,13 @@
 static void bdev_inode_switch_bdi(struct inode *inode,
 			struct backing_dev_info *dst)
 {
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
+	spin_lock(&inode->i_lock);
 	inode->i_data.backing_dev_info = dst;
 	if (inode->i_state & I_DIRTY)
 		list_move(&inode->i_wb_list, &dst->wb.b_dirty);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&inode_wb_list_lock);
 }
 
 static sector_t max_block(struct block_device *bdev)

diff --git a/fs/buffer.c b/fs/buffer.c
index 2e6b1a3..a08bb8e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c

@@ -1138,7 +1138,7 @@
  * inode list.
  *
  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
- * mapping->tree_lock and the global inode_lock.
+ * mapping->tree_lock and mapping->host->i_lock.
  */
 void mark_buffer_dirty(struct buffer_head *bh)
 {

diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index 06d27a4..af56ad5 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c

@@ -61,4 +61,13 @@
 		fs_table_header = NULL;
 	}
 }
+
+#else
+void coda_sysctl_init(void)
+{
+}
+
+void coda_sysctl_clean(void)
+{
+}
 #endif

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 816f88e6b..98b77c8 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c

@@ -8,6 +8,7 @@
 #include <linux/writeback.h>
 #include <linux/sysctl.h>
 #include <linux/gfp.h>
+#include "internal.h"
 
 /* A global variable is a bit ugly, but it keeps the code simple */
 int sysctl_drop_caches;
@@ -16,20 +17,23 @@
 {
 	struct inode *inode, *toput_inode = NULL;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+		spin_lock(&inode->i_lock);
+		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+		    (inode->i_mapping->nrpages == 0)) {
+			spin_unlock(&inode->i_lock);
 			continue;
-		if (inode->i_mapping->nrpages == 0)
-			continue;
+		}
 		__iget(inode);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&inode_sb_list_lock);
 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
 		iput(toput_inode);
 		toput_inode = inode;
-		spin_lock(&inode_lock);
+		spin_lock(&inode_sb_list_lock);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 	iput(toput_inode);
 }
 

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index adf96b8..97b970e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c

@@ -21,6 +21,8 @@
 #include "ext4_jbd2.h"
 #include "mballoc.h"
 
+#include <trace/events/ext4.h>
+
 /*
  * balloc.c contains the blocks allocation and deallocation routines
  */
@@ -342,6 +344,7 @@
 	 * We do it here so the bitmap uptodate bit
 	 * get set with buffer lock held.
 	 */
+	trace_ext4_read_block_bitmap_load(sb, block_group);
 	set_bitmap_uptodate(bh);
 	if (bh_submit_read(bh) < 0) {
 		put_bh(bh);

diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index d8b992e..e25e99b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h

@@ -202,13 +202,6 @@
 	return 1;
 }
 
-static inline void ext4_journal_release_buffer(handle_t *handle,
-						struct buffer_head *bh)
-{
-	if (ext4_handle_valid(handle))
-		jbd2_journal_release_buffer(handle, bh);
-}
-
 static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
 {
 	return ext4_journal_start_sb(inode->i_sb, nblocks);

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7516fb9..dd2cb50 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c

@@ -44,6 +44,8 @@
 #include "ext4_jbd2.h"
 #include "ext4_extents.h"
 
+#include <trace/events/ext4.h>
+
 static int ext4_ext_truncate_extend_restart(handle_t *handle,
 					    struct inode *inode,
 					    int needed)
@@ -664,6 +666,8 @@
 		if (unlikely(!bh))
 			goto err;
 		if (!bh_uptodate_or_lock(bh)) {
+			trace_ext4_ext_load_extent(inode, block,
+						path[ppos].p_block);
 			if (bh_submit_read(bh) < 0) {
 				put_bh(bh);
 				goto err;
@@ -1034,7 +1038,7 @@
 		for (i = 0; i < depth; i++) {
 			if (!ablocks[i])
 				continue;
-			ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+			ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
 					 EXT4_FREE_BLOCKS_METADATA);
 		}
 	}
@@ -2059,7 +2063,7 @@
 	if (err)
 		return err;
 	ext_debug("index is empty, remove it, free block %llu\n", leaf);
-	ext4_free_blocks(handle, inode, 0, leaf, 1,
+	ext4_free_blocks(handle, inode, NULL, leaf, 1,
 			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
 	return err;
 }
@@ -2156,7 +2160,7 @@
 		num = le32_to_cpu(ex->ee_block) + ee_len - from;
 		start = ext4_ext_pblock(ex) + ee_len - num;
 		ext_debug("free last %u blocks starting %llu\n", num, start);
-		ext4_free_blocks(handle, inode, 0, start, num, flags);
+		ext4_free_blocks(handle, inode, NULL, start, num, flags);
 	} else if (from == le32_to_cpu(ex->ee_block)
 		   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
 		printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -3108,14 +3112,13 @@
 {
 	int i, depth;
 	struct ext4_extent_header *eh;
-	struct ext4_extent *ex, *last_ex;
+	struct ext4_extent *last_ex;
 
 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
 		return 0;
 
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
-	ex = path[depth].p_ext;
 
 	if (unlikely(!eh->eh_entries)) {
 		EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
@@ -3295,9 +3298,8 @@
 			struct ext4_map_blocks *map, int flags)
 {
 	struct ext4_ext_path *path = NULL;
-	struct ext4_extent_header *eh;
 	struct ext4_extent newex, *ex;
-	ext4_fsblk_t newblock;
+	ext4_fsblk_t newblock = 0;
 	int err = 0, depth, ret;
 	unsigned int allocated = 0;
 	struct ext4_allocation_request ar;
@@ -3305,6 +3307,7 @@
 
 	ext_debug("blocks %u/%u requested for inode %lu\n",
 		  map->m_lblk, map->m_len, inode->i_ino);
+	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
 	/* check in cache */
 	if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
@@ -3352,7 +3355,6 @@
 		err = -EIO;
 		goto out2;
 	}
-	eh = path[depth].p_hdr;
 
 	ex = path[depth].p_ext;
 	if (ex) {
@@ -3485,7 +3487,7 @@
 		/* not a good idea to call discard here directly,
 		 * but otherwise we'd need to call it every free() */
 		ext4_discard_preallocations(inode);
-		ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
+		ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
 				 ext4_ext_get_actual_len(&newex), 0);
 		goto out2;
 	}
@@ -3525,6 +3527,8 @@
 		ext4_ext_drop_refs(path);
 		kfree(path);
 	}
+	trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
+		newblock, map->m_len, err ? err : allocated);
 	return err ? err : allocated;
 }
 
@@ -3658,6 +3662,7 @@
 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 		return -EOPNOTSUPP;
 
+	trace_ext4_fallocate_enter(inode, offset, len, mode);
 	map.m_lblk = offset >> blkbits;
 	/*
 	 * We can't just convert len to max_blocks because
@@ -3673,6 +3678,7 @@
 	ret = inode_newsize_ok(inode, (len + offset));
 	if (ret) {
 		mutex_unlock(&inode->i_mutex);
+		trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
 		return ret;
 	}
 retry:
@@ -3717,6 +3723,8 @@
 		goto retry;
 	}
 	mutex_unlock(&inode->i_mutex);
+	trace_ext4_fallocate_exit(inode, offset, max_blocks,
+				ret > 0 ? ret2 : ret);
 	return ret > 0 ? ret2 : ret;
 }
 
@@ -3775,6 +3783,7 @@
 	}
 	return ret > 0 ? ret2 : ret;
 }
+
 /*
  * Callback function called for each extent to gather FIEMAP information.
  */
@@ -3782,38 +3791,162 @@
 		       struct ext4_ext_cache *newex, struct ext4_extent *ex,
 		       void *data)
 {
-	struct fiemap_extent_info *fieinfo = data;
-	unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
 	__u64	logical;
 	__u64	physical;
 	__u64	length;
+	loff_t	size;
 	__u32	flags = 0;
-	int	error;
+	int		ret = 0;
+	struct fiemap_extent_info *fieinfo = data;
+	unsigned char blksize_bits;
 
-	logical =  (__u64)newex->ec_block << blksize_bits;
+	blksize_bits = inode->i_sb->s_blocksize_bits;
+	logical = (__u64)newex->ec_block << blksize_bits;
 
 	if (newex->ec_start == 0) {
-		pgoff_t offset;
-		struct page *page;
+		/*
+		 * No extent in extent-tree contains block @newex->ec_start,
+		 * then the block may stay in 1)a hole or 2)delayed-extent.
+		 *
+		 * Holes or delayed-extents are processed as follows.
+		 * 1. lookup dirty pages with specified range in pagecache.
+		 *    If no page is got, then there is no delayed-extent and
+		 *    return with EXT_CONTINUE.
+		 * 2. find the 1st mapped buffer,
+		 * 3. check if the mapped buffer is both in the request range
+		 *    and a delayed buffer. If not, there is no delayed-extent,
+		 *    then return.
+		 * 4. a delayed-extent is found, the extent will be collected.
+		 */
+		ext4_lblk_t	end = 0;
+		pgoff_t		last_offset;
+		pgoff_t		offset;
+		pgoff_t		index;
+		struct page	**pages = NULL;
 		struct buffer_head *bh = NULL;
+		struct buffer_head *head = NULL;
+		unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
+
+		pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (pages == NULL)
+			return -ENOMEM;
 
 		offset = logical >> PAGE_SHIFT;
-		page = find_get_page(inode->i_mapping, offset);
-		if (!page || !page_has_buffers(page))
-			return EXT_CONTINUE;
+repeat:
+		last_offset = offset;
+		head = NULL;
+		ret = find_get_pages_tag(inode->i_mapping, &offset,
+					PAGECACHE_TAG_DIRTY, nr_pages, pages);
 
-		bh = page_buffers(page);
+		if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+			/* First time, try to find a mapped buffer. */
+			if (ret == 0) {
+out:
+				for (index = 0; index < ret; index++)
+					page_cache_release(pages[index]);
+				/* just a hole. */
+				kfree(pages);
+				return EXT_CONTINUE;
+			}
 
-		if (!bh)
-			return EXT_CONTINUE;
+			/* Try to find the 1st mapped buffer. */
+			end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
+				  blksize_bits;
+			if (!page_has_buffers(pages[0]))
+				goto out;
+			head = page_buffers(pages[0]);
+			if (!head)
+				goto out;
 
-		if (buffer_delay(bh)) {
-			flags |= FIEMAP_EXTENT_DELALLOC;
-			page_cache_release(page);
+			bh = head;
+			do {
+				if (buffer_mapped(bh)) {
+					/* get the 1st mapped buffer. */
+					if (end > newex->ec_block +
+						newex->ec_len)
+						/* The buffer is out of
+						 * the request range.
+						 */
+						goto out;
+					goto found_mapped_buffer;
+				}
+				bh = bh->b_this_page;
+				end++;
+			} while (bh != head);
+
+			/* No mapped buffer found. */
+			goto out;
 		} else {
-			page_cache_release(page);
-			return EXT_CONTINUE;
+			/*Find contiguous delayed buffers. */
+			if (ret > 0 && pages[0]->index == last_offset)
+				head = page_buffers(pages[0]);
+			bh = head;
 		}
+
+found_mapped_buffer:
+		if (bh != NULL && buffer_delay(bh)) {
+			/* 1st or contiguous delayed buffer found. */
+			if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+				/*
+				 * 1st delayed buffer found, record
+				 * the start of extent.
+				 */
+				flags |= FIEMAP_EXTENT_DELALLOC;
+				newex->ec_block = end;
+				logical = (__u64)end << blksize_bits;
+			}
+			/* Find contiguous delayed buffers. */
+			do {
+				if (!buffer_delay(bh))
+					goto found_delayed_extent;
+				bh = bh->b_this_page;
+				end++;
+			} while (bh != head);
+
+			for (index = 1; index < ret; index++) {
+				if (!page_has_buffers(pages[index])) {
+					bh = NULL;
+					break;
+				}
+				head = page_buffers(pages[index]);
+				if (!head) {
+					bh = NULL;
+					break;
+				}
+				if (pages[index]->index !=
+					pages[0]->index + index) {
+					/* Blocks are not contiguous. */
+					bh = NULL;
+					break;
+				}
+				bh = head;
+				do {
+					if (!buffer_delay(bh))
+						/* Delayed-extent ends. */
+						goto found_delayed_extent;
+					bh = bh->b_this_page;
+					end++;
+				} while (bh != head);
+			}
+		} else if (!(flags & FIEMAP_EXTENT_DELALLOC))
+			/* a hole found. */
+			goto out;
+
+found_delayed_extent:
+		newex->ec_len = min(end - newex->ec_block,
+						(ext4_lblk_t)EXT_INIT_MAX_LEN);
+		if (ret == nr_pages && bh != NULL &&
+			newex->ec_len < EXT_INIT_MAX_LEN &&
+			buffer_delay(bh)) {
+			/* Have not collected an extent and continue. */
+			for (index = 0; index < ret; index++)
+				page_cache_release(pages[index]);
+			goto repeat;
+		}
+
+		for (index = 0; index < ret; index++)
+			page_cache_release(pages[index]);
+		kfree(pages);
 	}
 
 	physical = (__u64)newex->ec_start << blksize_bits;
@@ -3822,32 +3955,16 @@
 	if (ex && ext4_ext_is_uninitialized(ex))
 		flags |= FIEMAP_EXTENT_UNWRITTEN;
 
-	/*
-	 * If this extent reaches EXT_MAX_BLOCK, it must be last.
-	 *
-	 * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
-	 * this also indicates no more allocated blocks.
-	 *
-	 * XXX this might miss a single-block extent at EXT_MAX_BLOCK
-	 */
-	if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
-	    newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
-		loff_t size = i_size_read(inode);
-		loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
-
+	size = i_size_read(inode);
+	if (logical + length >= size)
 		flags |= FIEMAP_EXTENT_LAST;
-		if ((flags & FIEMAP_EXTENT_DELALLOC) &&
-		    logical+length > size)
-			length = (size - logical + bs - 1) & ~(bs-1);
-	}
 
-	error = fiemap_fill_next_extent(fieinfo, logical, physical,
+	ret = fiemap_fill_next_extent(fieinfo, logical, physical,
 					length, flags);
-	if (error < 0)
-		return error;
-	if (error == 1)
+	if (ret < 0)
+		return ret;
+	if (ret == 1)
 		return EXT_BREAK;
-
 	return EXT_CONTINUE;
 }
 

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 7829b28..7f74019 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c

@@ -164,20 +164,20 @@
 
 	J_ASSERT(ext4_journal_current_handle() == NULL);
 
-	trace_ext4_sync_file(file, datasync);
+	trace_ext4_sync_file_enter(file, datasync);
 
 	if (inode->i_sb->s_flags & MS_RDONLY)
 		return 0;
 
 	ret = ext4_flush_completed_IO(inode);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	if (!journal) {
 		ret = generic_file_fsync(file, datasync);
 		if (!ret && !list_empty(&inode->i_dentry))
 			ext4_sync_parent(inode);
-		return ret;
+		goto out;
 	}
 
 	/*
@@ -194,8 +194,10 @@
 	 *  (they were dirtied by commit).  But that's OK - the blocks are
 	 *  safe in-journal, which is all fsync() needs to ensure.
 	 */
-	if (ext4_should_journal_data(inode))
-		return ext4_force_commit(inode->i_sb);
+	if (ext4_should_journal_data(inode)) {
+		ret = ext4_force_commit(inode->i_sb);
+		goto out;
+	}
 
 	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
 	if (jbd2_log_start_commit(journal, commit_tid)) {
@@ -215,5 +217,7 @@
 		ret = jbd2_log_wait_commit(journal, commit_tid);
 	} else if (journal->j_flags & JBD2_BARRIER)
 		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ out:
+	trace_ext4_sync_file_exit(inode, ret);
 	return ret;
 }

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 78b79e1..21bb2f6 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c

@@ -152,6 +152,7 @@
 	 * We do it here so the bitmap uptodate bit
 	 * get set with buffer lock held.
 	 */
+	trace_ext4_load_inode_bitmap(sb, block_group);
 	set_bitmap_uptodate(bh);
 	if (bh_submit_read(bh) < 0) {
 		put_bh(bh);
@@ -649,7 +650,7 @@
 		*group = parent_group + flex_size;
 		if (*group > ngroups)
 			*group = 0;
-		return find_group_orlov(sb, parent, group, mode, 0);
+		return find_group_orlov(sb, parent, group, mode, NULL);
 	}
 
 	/*
@@ -1054,6 +1055,11 @@
 		}
 	}
 
+	if (ext4_handle_valid(handle)) {
+		ei->i_sync_tid = handle->h_transaction->t_tid;
+		ei->i_datasync_tid = handle->h_transaction->t_tid;
+	}
+
 	err = ext4_mark_inode_dirty(handle, inode);
 	if (err) {
 		ext4_std_error(sb, err);

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9297ad4..1a86282 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c

@@ -173,7 +173,7 @@
 	BUG_ON(EXT4_JOURNAL(inode) == NULL);
 	jbd_debug(2, "restarting handle %p\n", handle);
 	up_write(&EXT4_I(inode)->i_data_sem);
-	ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
+	ret = ext4_journal_restart(handle, nblocks);
 	down_write(&EXT4_I(inode)->i_data_sem);
 	ext4_discard_preallocations(inode);
 
@@ -720,7 +720,7 @@
 	return ret;
 failed_out:
 	for (i = 0; i < index; i++)
-		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
 	return ret;
 }
 
@@ -823,20 +823,20 @@
 	return err;
 failed:
 	/* Allocation failed, free what we already allocated */
-	ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
+	ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
 	for (i = 1; i <= n ; i++) {
 		/*
 		 * branch[i].bh is newly allocated, so there is no
 		 * need to revoke the block, which is why we don't
 		 * need to set EXT4_FREE_BLOCKS_METADATA.
 		 */
-		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
 				 EXT4_FREE_BLOCKS_FORGET);
 	}
 	for (i = n+1; i < indirect_blks; i++)
-		ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
 
-	ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
+	ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
 
 	return err;
 }
@@ -924,7 +924,7 @@
 		ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
 				 EXT4_FREE_BLOCKS_FORGET);
 	}
-	ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
 			 blks, 0);
 
 	return err;
@@ -973,6 +973,7 @@
 	int count = 0;
 	ext4_fsblk_t first_block = 0;
 
+	trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 	J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
 	J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
 	depth = ext4_block_to_path(inode, map->m_lblk, offsets,
@@ -1058,6 +1059,8 @@
 		partial--;
 	}
 out:
+	trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
+				map->m_pblk, map->m_len, err);
 	return err;
 }
 
@@ -2060,7 +2063,7 @@
 		if (nr_pages == 0)
 			break;
 		for (i = 0; i < nr_pages; i++) {
-			int commit_write = 0, redirty_page = 0;
+			int commit_write = 0, skip_page = 0;
 			struct page *page = pvec.pages[i];
 
 			index = page->index;
@@ -2086,14 +2089,12 @@
 			 * If the page does not have buffers (for
 			 * whatever reason), try to create them using
 			 * __block_write_begin.  If this fails,
-			 * redirty the page and move on.
+			 * skip the page and move on.
 			 */
 			if (!page_has_buffers(page)) {
 				if (__block_write_begin(page, 0, len,
 						noalloc_get_block_write)) {
-				redirty_page:
-					redirty_page_for_writepage(mpd->wbc,
-								   page);
+				skip_page:
 					unlock_page(page);
 					continue;
 				}
@@ -2104,7 +2105,7 @@
 			block_start = 0;
 			do {
 				if (!bh)
-					goto redirty_page;
+					goto skip_page;
 				if (map && (cur_logical >= map->m_lblk) &&
 				    (cur_logical <= (map->m_lblk +
 						     (map->m_len - 1)))) {
@@ -2120,22 +2121,23 @@
 					clear_buffer_unwritten(bh);
 				}
 
-				/* redirty page if block allocation undone */
+				/* skip page if block allocation undone */
 				if (buffer_delay(bh) || buffer_unwritten(bh))
-					redirty_page = 1;
+					skip_page = 1;
 				bh = bh->b_this_page;
 				block_start += bh->b_size;
 				cur_logical++;
 				pblock++;
 			} while (bh != page_bufs);
 
-			if (redirty_page)
-				goto redirty_page;
+			if (skip_page)
+				goto skip_page;
 
 			if (commit_write)
 				/* mark the buffer_heads as dirty & uptodate */
 				block_commit_write(page, 0, len);
 
+			clear_page_dirty_for_io(page);
 			/*
 			 * Delalloc doesn't support data journalling,
 			 * but eventually maybe we'll lift this
@@ -2165,8 +2167,7 @@
 	return ret;
 }
 
-static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
-					sector_t logical, long blk_cnt)
+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
 {
 	int nr_pages, i;
 	pgoff_t index, end;
@@ -2174,9 +2175,8 @@
 	struct inode *inode = mpd->inode;
 	struct address_space *mapping = inode->i_mapping;
 
-	index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-	end   = (logical + blk_cnt - 1) >>
-				(PAGE_CACHE_SHIFT - inode->i_blkbits);
+	index = mpd->first_page;
+	end   = mpd->next_page - 1;
 	while (index <= end) {
 		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
 		if (nr_pages == 0)
@@ -2279,9 +2279,8 @@
 		err = blks;
 		/*
 		 * If get block returns EAGAIN or ENOSPC and there
-		 * appears to be free blocks we will call
-		 * ext4_writepage() for all of the pages which will
-		 * just redirty the pages.
+		 * appears to be free blocks we will just let
+		 * mpage_da_submit_io() unlock all of the pages.
 		 */
 		if (err == -EAGAIN)
 			goto submit_io;
@@ -2312,8 +2311,10 @@
 				ext4_print_free_blocks(mpd->inode);
 		}
 		/* invalidate all the pages */
-		ext4_da_block_invalidatepages(mpd, next,
-				mpd->b_size >> mpd->inode->i_blkbits);
+		ext4_da_block_invalidatepages(mpd);
+
+		/* Mark this page range as having been completed */
+		mpd->io_done = 1;
 		return;
 	}
 	BUG_ON(blks == 0);
@@ -2438,102 +2439,6 @@
 }
 
 /*
- * __mpage_da_writepage - finds extent of pages and blocks
- *
- * @page: page to consider
- * @wbc: not used, we just follow rules
- * @data: context
- *
- * The function finds extents of pages and scan them for all blocks.
- */
-static int __mpage_da_writepage(struct page *page,
-				struct writeback_control *wbc,
-				struct mpage_da_data *mpd)
-{
-	struct inode *inode = mpd->inode;
-	struct buffer_head *bh, *head;
-	sector_t logical;
-
-	/*
-	 * Can we merge this page to current extent?
-	 */
-	if (mpd->next_page != page->index) {
-		/*
-		 * Nope, we can't. So, we map non-allocated blocks
-		 * and start IO on them
-		 */
-		if (mpd->next_page != mpd->first_page) {
-			mpage_da_map_and_submit(mpd);
-			/*
-			 * skip rest of the page in the page_vec
-			 */
-			redirty_page_for_writepage(wbc, page);
-			unlock_page(page);
-			return MPAGE_DA_EXTENT_TAIL;
-		}
-
-		/*
-		 * Start next extent of pages ...
-		 */
-		mpd->first_page = page->index;
-
-		/*
-		 * ... and blocks
-		 */
-		mpd->b_size = 0;
-		mpd->b_state = 0;
-		mpd->b_blocknr = 0;
-	}
-
-	mpd->next_page = page->index + 1;
-	logical = (sector_t) page->index <<
-		  (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
-	if (!page_has_buffers(page)) {
-		mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
-				       (1 << BH_Dirty) | (1 << BH_Uptodate));
-		if (mpd->io_done)
-			return MPAGE_DA_EXTENT_TAIL;
-	} else {
-		/*
-		 * Page with regular buffer heads, just add all dirty ones
-		 */
-		head = page_buffers(page);
-		bh = head;
-		do {
-			BUG_ON(buffer_locked(bh));
-			/*
-			 * We need to try to allocate
-			 * unmapped blocks in the same page.
-			 * Otherwise we won't make progress
-			 * with the page in ext4_writepage
-			 */
-			if (ext4_bh_delay_or_unwritten(NULL, bh)) {
-				mpage_add_bh_to_extent(mpd, logical,
-						       bh->b_size,
-						       bh->b_state);
-				if (mpd->io_done)
-					return MPAGE_DA_EXTENT_TAIL;
-			} else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
-				/*
-				 * mapped dirty buffer. We need to update
-				 * the b_state because we look at
-				 * b_state in mpage_da_map_blocks. We don't
-				 * update b_size because if we find an
-				 * unmapped buffer_head later we need to
-				 * use the b_state flag of that buffer_head.
-				 */
-				if (mpd->b_size == 0)
-					mpd->b_state = bh->b_state & BH_FLAGS;
-			}
-			logical++;
-		} while ((bh = bh->b_this_page) != head);
-	}
-
-	return 0;
-}
-
-/*
  * This is a special get_blocks_t callback which is used by
  * ext4_da_write_begin().  It will either return mapped block or
  * reserve space for a single block.
@@ -2597,7 +2502,6 @@
 		 * for partial write.
 		 */
 		set_buffer_new(bh);
-		set_buffer_mapped(bh);
 	}
 	return 0;
 }
@@ -2811,27 +2715,27 @@
 
 /*
  * write_cache_pages_da - walk the list of dirty pages of the given
- * address space and call the callback function (which usually writes
- * the pages).
- *
- * This is a forked version of write_cache_pages().  Differences:
- *	Range cyclic is ignored.
- *	no_nrwrite_index_update is always presumed true
+ * address space and accumulate pages that need writing, and call
+ * mpage_da_map_and_submit to map a single contiguous memory region
+ * and then write them.
  */
 static int write_cache_pages_da(struct address_space *mapping,
 				struct writeback_control *wbc,
 				struct mpage_da_data *mpd,
 				pgoff_t *done_index)
 {
-	int ret = 0;
-	int done = 0;
-	struct pagevec pvec;
-	unsigned nr_pages;
-	pgoff_t index;
-	pgoff_t end;		/* Inclusive */
-	long nr_to_write = wbc->nr_to_write;
-	int tag;
+	struct buffer_head	*bh, *head;
+	struct inode		*inode = mapping->host;
+	struct pagevec		pvec;
+	unsigned int		nr_pages;
+	sector_t		logical;
+	pgoff_t			index, end;
+	long			nr_to_write = wbc->nr_to_write;
+	int			i, tag, ret = 0;
 
+	memset(mpd, 0, sizeof(struct mpage_da_data));
+	mpd->wbc = wbc;
+	mpd->inode = inode;
 	pagevec_init(&pvec, 0);
 	index = wbc->range_start >> PAGE_CACHE_SHIFT;
 	end = wbc->range_end >> PAGE_CACHE_SHIFT;
@@ -2842,13 +2746,11 @@
 		tag = PAGECACHE_TAG_DIRTY;
 
 	*done_index = index;
-	while (!done && (index <= end)) {
-		int i;
-
+	while (index <= end) {
 		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
 			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
 		if (nr_pages == 0)
-			break;
+			return 0;
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
@@ -2860,60 +2762,100 @@
 			 * mapping. However, page->index will not change
 			 * because we have a reference on the page.
 			 */
-			if (page->index > end) {
-				done = 1;
-				break;
-			}
+			if (page->index > end)
+				goto out;
 
 			*done_index = page->index + 1;
 
+			/*
+			 * If we can't merge this page, and we have
+			 * accumulated an contiguous region, write it
+			 */
+			if ((mpd->next_page != page->index) &&
+			    (mpd->next_page != mpd->first_page)) {
+				mpage_da_map_and_submit(mpd);
+				goto ret_extent_tail;
+			}
+
 			lock_page(page);
 
 			/*
-			 * Page truncated or invalidated. We can freely skip it
-			 * then, even for data integrity operations: the page
-			 * has disappeared concurrently, so there could be no
-			 * real expectation of this data interity operation
-			 * even if there is now a new, dirty page at the same
-			 * pagecache address.
+			 * If the page is no longer dirty, or its
+			 * mapping no longer corresponds to inode we
+			 * are writing (which means it has been
+			 * truncated or invalidated), or the page is
+			 * already under writeback and we are not
+			 * doing a data integrity writeback, skip the page
 			 */
-			if (unlikely(page->mapping != mapping)) {
-continue_unlock:
+			if (!PageDirty(page) ||
+			    (PageWriteback(page) &&
+			     (wbc->sync_mode == WB_SYNC_NONE)) ||
+			    unlikely(page->mapping != mapping)) {
 				unlock_page(page);
 				continue;
 			}
 
-			if (!PageDirty(page)) {
-				/* someone wrote it for us */
-				goto continue_unlock;
-			}
-
-			if (PageWriteback(page)) {
-				if (wbc->sync_mode != WB_SYNC_NONE)
-					wait_on_page_writeback(page);
-				else
-					goto continue_unlock;
-			}
+			if (PageWriteback(page))
+				wait_on_page_writeback(page);
 
 			BUG_ON(PageWriteback(page));
-			if (!clear_page_dirty_for_io(page))
-				goto continue_unlock;
 
-			ret = __mpage_da_writepage(page, wbc, mpd);
-			if (unlikely(ret)) {
-				if (ret == AOP_WRITEPAGE_ACTIVATE) {
-					unlock_page(page);
-					ret = 0;
-				} else {
-					done = 1;
-					break;
-				}
+			if (mpd->next_page != page->index)
+				mpd->first_page = page->index;
+			mpd->next_page = page->index + 1;
+			logical = (sector_t) page->index <<
+				(PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+			if (!page_has_buffers(page)) {
+				mpage_add_bh_to_extent(mpd, logical,
+						       PAGE_CACHE_SIZE,
+						       (1 << BH_Dirty) | (1 << BH_Uptodate));
+				if (mpd->io_done)
+					goto ret_extent_tail;
+			} else {
+				/*
+				 * Page with regular buffer heads,
+				 * just add all dirty ones
+				 */
+				head = page_buffers(page);
+				bh = head;
+				do {
+					BUG_ON(buffer_locked(bh));
+					/*
+					 * We need to try to allocate
+					 * unmapped blocks in the same page.
+					 * Otherwise we won't make progress
+					 * with the page in ext4_writepage
+					 */
+					if (ext4_bh_delay_or_unwritten(NULL, bh)) {
+						mpage_add_bh_to_extent(mpd, logical,
+								       bh->b_size,
+								       bh->b_state);
+						if (mpd->io_done)
+							goto ret_extent_tail;
+					} else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
+						/*
+						 * mapped dirty buffer. We need
+						 * to update the b_state
+						 * because we look at b_state
+						 * in mpage_da_map_blocks.  We
+						 * don't update b_size because
+						 * if we find an unmapped
+						 * buffer_head later we need to
+						 * use the b_state flag of that
+						 * buffer_head.
+						 */
+						if (mpd->b_size == 0)
+							mpd->b_state = bh->b_state & BH_FLAGS;
+					}
+					logical++;
+				} while ((bh = bh->b_this_page) != head);
 			}
 
 			if (nr_to_write > 0) {
 				nr_to_write--;
 				if (nr_to_write == 0 &&
-				    wbc->sync_mode == WB_SYNC_NONE) {
+				    wbc->sync_mode == WB_SYNC_NONE)
 					/*
 					 * We stop writing back only if we are
 					 * not doing integrity sync. In case of
@@ -2924,14 +2866,18 @@
 					 * pages, but have not synced all of the
 					 * old dirty pages.
 					 */
-					done = 1;
-					break;
-				}
+					goto out;
 			}
 		}
 		pagevec_release(&pvec);
 		cond_resched();
 	}
+	return 0;
+ret_extent_tail:
+	ret = MPAGE_DA_EXTENT_TAIL;
+out:
+	pagevec_release(&pvec);
+	cond_resched();
 	return ret;
 }
 
@@ -2945,7 +2891,6 @@
 	struct mpage_da_data mpd;
 	struct inode *inode = mapping->host;
 	int pages_written = 0;
-	long pages_skipped;
 	unsigned int max_pages;
 	int range_cyclic, cycled = 1, io_done = 0;
 	int needed_blocks, ret = 0;
@@ -3028,11 +2973,6 @@
 		wbc->nr_to_write = desired_nr_to_write;
 	}
 
-	mpd.wbc = wbc;
-	mpd.inode = mapping->host;
-
-	pages_skipped = wbc->pages_skipped;
-
 retry:
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		tag_pages_for_writeback(mapping, index, end);
@@ -3059,22 +2999,10 @@
 		}
 
 		/*
-		 * Now call __mpage_da_writepage to find the next
+		 * Now call write_cache_pages_da() to find the next
 		 * contiguous region of logical blocks that need
-		 * blocks to be allocated by ext4.  We don't actually
-		 * submit the blocks for I/O here, even though
-		 * write_cache_pages thinks it will, and will set the
-		 * pages as clean for write before calling
-		 * __mpage_da_writepage().
+		 * blocks to be allocated by ext4 and submit them.
 		 */
-		mpd.b_size = 0;
-		mpd.b_state = 0;
-		mpd.b_blocknr = 0;
-		mpd.first_page = 0;
-		mpd.next_page = 0;
-		mpd.io_done = 0;
-		mpd.pages_written = 0;
-		mpd.retval = 0;
 		ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
 		/*
 		 * If we have a contiguous extent of pages and we
@@ -3096,7 +3024,6 @@
 			 * and try again
 			 */
 			jbd2_journal_force_commit_nested(sbi->s_journal);
-			wbc->pages_skipped = pages_skipped;
 			ret = 0;
 		} else if (ret == MPAGE_DA_EXTENT_TAIL) {
 			/*
@@ -3104,7 +3031,6 @@
 			 * rest of the pages
 			 */
 			pages_written += mpd.pages_written;
-			wbc->pages_skipped = pages_skipped;
 			ret = 0;
 			io_done = 1;
 		} else if (wbc->nr_to_write)
@@ -3122,11 +3048,6 @@
 		wbc->range_end  = mapping->writeback_index - 1;
 		goto retry;
 	}
-	if (pages_skipped != wbc->pages_skipped)
-		ext4_msg(inode->i_sb, KERN_CRIT,
-			 "This should not happen leaving %s "
-			 "with nr_to_write = %ld ret = %d",
-			 __func__, wbc->nr_to_write, ret);
 
 	/* Update index */
 	wbc->range_cyclic = range_cyclic;
@@ -3460,6 +3381,7 @@
 
 static int ext4_readpage(struct file *file, struct page *page)
 {
+	trace_ext4_readpage(page);
 	return mpage_readpage(page, ext4_get_block);
 }
 
@@ -3494,6 +3416,8 @@
 {
 	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
+	trace_ext4_invalidatepage(page, offset);
+
 	/*
 	 * free any io_end structure allocated for buffers to be discarded
 	 */
@@ -3515,6 +3439,8 @@
 {
 	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
+	trace_ext4_releasepage(page);
+
 	WARN_ON(PageChecked(page));
 	if (!page_has_buffers(page))
 		return 0;
@@ -3873,11 +3799,16 @@
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
 
+	trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
-
-	return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+		ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+	else
+		ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+	trace_ext4_direct_IO_exit(inode, offset,
+				iov_length(iov, nr_segs), rw, ret);
+	return ret;
 }
 
 /*
@@ -4173,6 +4104,9 @@
  *
  * We release `count' blocks on disk, but (last - first) may be greater
  * than `count' because there can be holes in there.
+ *
+ * Return 0 on success, 1 on invalid block range
+ * and < 0 on fatal error.
  */
 static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
 			     struct buffer_head *bh,
@@ -4199,33 +4133,32 @@
 		if (bh) {
 			BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
 			err = ext4_handle_dirty_metadata(handle, inode, bh);
-			if (unlikely(err)) {
-				ext4_std_error(inode->i_sb, err);
-				return 1;
-			}
+			if (unlikely(err))
+				goto out_err;
 		}
 		err = ext4_mark_inode_dirty(handle, inode);
-		if (unlikely(err)) {
-			ext4_std_error(inode->i_sb, err);
-			return 1;
-		}
+		if (unlikely(err))
+			goto out_err;
 		err = ext4_truncate_restart_trans(handle, inode,
 						  blocks_for_truncate(inode));
-		if (unlikely(err)) {
-			ext4_std_error(inode->i_sb, err);
-			return 1;
-		}
+		if (unlikely(err))
+			goto out_err;
 		if (bh) {
 			BUFFER_TRACE(bh, "retaking write access");
-			ext4_journal_get_write_access(handle, bh);
+			err = ext4_journal_get_write_access(handle, bh);
+			if (unlikely(err))
+				goto out_err;
 		}
 	}
 
 	for (p = first; p < last; p++)
 		*p = 0;
 
-	ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
+	ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
 	return 0;
+out_err:
+	ext4_std_error(inode->i_sb, err);
+	return err;
 }
 
 /**
@@ -4259,7 +4192,7 @@
 	ext4_fsblk_t nr;		    /* Current block # */
 	__le32 *p;			    /* Pointer into inode/ind
 					       for current block */
-	int err;
+	int err = 0;
 
 	if (this_bh) {				/* For indirect block */
 		BUFFER_TRACE(this_bh, "get_write_access");
@@ -4281,9 +4214,10 @@
 			} else if (nr == block_to_free + count) {
 				count++;
 			} else {
-				if (ext4_clear_blocks(handle, inode, this_bh,
-						      block_to_free, count,
-						      block_to_free_p, p))
+				err = ext4_clear_blocks(handle, inode, this_bh,
+						        block_to_free, count,
+						        block_to_free_p, p);
+				if (err)
 					break;
 				block_to_free = nr;
 				block_to_free_p = p;
@@ -4292,9 +4226,12 @@
 		}
 	}
 
-	if (count > 0)
-		ext4_clear_blocks(handle, inode, this_bh, block_to_free,
-				  count, block_to_free_p, p);
+	if (!err && count > 0)
+		err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
+					count, block_to_free_p, p);
+	if (err < 0)
+		/* fatal error */
+		return;
 
 	if (this_bh) {
 		BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
@@ -4412,7 +4349,7 @@
 			 * transaction where the data blocks are
 			 * actually freed.
 			 */
-			ext4_free_blocks(handle, inode, 0, nr, 1,
+			ext4_free_blocks(handle, inode, NULL, nr, 1,
 					 EXT4_FREE_BLOCKS_METADATA|
 					 EXT4_FREE_BLOCKS_FORGET);
 
@@ -4496,6 +4433,8 @@
 	ext4_lblk_t last_block;
 	unsigned blocksize = inode->i_sb->s_blocksize;
 
+	trace_ext4_truncate_enter(inode);
+
 	if (!ext4_can_truncate(inode))
 		return;
 
@@ -4506,6 +4445,7 @@
 
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
 		ext4_ext_truncate(inode);
+		trace_ext4_truncate_exit(inode);
 		return;
 	}
 
@@ -4635,6 +4575,7 @@
 		ext4_orphan_del(handle, inode);
 
 	ext4_journal_stop(handle);
+	trace_ext4_truncate_exit(inode);
 }
 
 /*
@@ -4766,6 +4707,7 @@
 		 * has in-inode xattrs, or we don't have this inode in memory.
 		 * Read the block from disk.
 		 */
+		trace_ext4_load_inode(inode);
 		get_bh(bh);
 		bh->b_end_io = end_buffer_read_sync;
 		submit_bh(READ_META, bh);
@@ -4871,7 +4813,7 @@
 		return inode;
 
 	ei = EXT4_I(inode);
-	iloc.bh = 0;
+	iloc.bh = NULL;
 
 	ret = __ext4_get_inode_loc(inode, &iloc, 0);
 	if (ret < 0)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a84faa1..808c554 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c

@@ -334,16 +334,22 @@
 	case FITRIM:
 	{
 		struct super_block *sb = inode->i_sb;
+		struct request_queue *q = bdev_get_queue(sb->s_bdev);
 		struct fstrim_range range;
 		int ret = 0;
 
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
+		if (!blk_queue_discard(q))
+			return -EOPNOTSUPP;
+
 		if (copy_from_user(&range, (struct fstrim_range *)arg,
 		    sizeof(range)))
 			return -EFAULT;
 
+		range.minlen = max((unsigned int)range.minlen,
+				   q->limits.discard_granularity);
 		ret = ext4_trim_fs(sb, &range);
 		if (ret < 0)
 			return ret;
@@ -421,6 +427,7 @@
 		return err;
 	}
 	case EXT4_IOC_MOVE_EXT:
+	case FITRIM:
 		break;
 	default:
 		return -ENOIOCTLCMD;

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d1fe09a..a5837a8 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c

@@ -432,9 +432,10 @@
 	}
 
 	/* at order 0 we see each particular block */
-	*max = 1 << (e4b->bd_blkbits + 3);
-	if (order == 0)
+	if (order == 0) {
+		*max = 1 << (e4b->bd_blkbits + 3);
 		return EXT4_MB_BITMAP(e4b);
+	}
 
 	bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
 	*max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
@@ -616,7 +617,6 @@
 	MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
 
 	grp = ext4_get_group_info(sb, e4b->bd_group);
-	buddy = mb_find_buddy(e4b, 0, &max);
 	list_for_each(cur, &grp->bb_prealloc_list) {
 		ext4_group_t groupnr;
 		struct ext4_prealloc_space *pa;
@@ -635,7 +635,12 @@
 #define mb_check_buddy(e4b)
 #endif
 
-/* FIXME!! need more doc */
+/*
+ * Divide blocks started from @first with length @len into
+ * smaller chunks with power of 2 blocks.
+ * Clear the bits in bitmap which the blocks of the chunk(s) covered,
+ * then increase bb_counters[] for corresponded chunk size.
+ */
 static void ext4_mb_mark_free_simple(struct super_block *sb,
 				void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
 					struct ext4_group_info *grp)
@@ -2381,7 +2386,7 @@
 	/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
 	 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
 	 * So a two level scheme suffices for now. */
-	sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
+	sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
 	if (sbi->s_group_info == NULL) {
 		printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
 		return -ENOMEM;
@@ -3208,7 +3213,7 @@
 	cur_distance = abs(goal_block - cpa->pa_pstart);
 	new_distance = abs(goal_block - pa->pa_pstart);
 
-	if (cur_distance < new_distance)
+	if (cur_distance <= new_distance)
 		return cpa;
 
 	/* drop the previous reference */
@@ -3907,7 +3912,8 @@
 	struct super_block *sb = ac->ac_sb;
 	ext4_group_t ngroups, i;
 
-	if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+	if (!mb_enable_debug ||
+	    (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
 		return;
 
 	printk(KERN_ERR "EXT4-fs: Can't allocate:"
@@ -4753,7 +4759,8 @@
  * bitmap. Then issue a TRIM command on this extent and free the extent in
  * the group buddy bitmap. This is done until whole group is scanned.
  */
-ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
+static ext4_grpblk_t
+ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
 		ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
 {
 	void *bitmap;
@@ -4863,10 +4870,15 @@
 			break;
 		}
 
-		if (len >= EXT4_BLOCKS_PER_GROUP(sb))
-			len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
-		else
+		/*
+		 * For all the groups except the last one, last block will
+		 * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to
+		 * change it for the last group in which case start +
+		 * len < EXT4_BLOCKS_PER_GROUP(sb).
+		 */
+		if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
 			last_block = first_block + len;
+		len -= last_block - first_block;
 
 		if (e4b.bd_info->bb_free >= minlen) {
 			cnt = ext4_trim_all_free(sb, &e4b, first_block,

diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b619322..22bd4d7 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h

@@ -169,7 +169,7 @@
 	/* original request */
 	struct ext4_free_extent ac_o_ex;
 
-	/* goal request (after normalization) */
+	/* goal request (normalized ac_o_ex) */
 	struct ext4_free_extent ac_g_ex;
 
 	/* the best found extent */

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b0a126f..d1bafa5 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c

@@ -263,7 +263,7 @@
 	for (i = 0; i < max_entries; i++) {
 		if (tmp_idata[i]) {
 			extend_credit_for_blkdel(handle, inode);
-			ext4_free_blocks(handle, inode, 0,
+			ext4_free_blocks(handle, inode, NULL,
 					 le32_to_cpu(tmp_idata[i]), 1,
 					 EXT4_FREE_BLOCKS_METADATA |
 					 EXT4_FREE_BLOCKS_FORGET);
@@ -271,7 +271,7 @@
 	}
 	put_bh(bh);
 	extend_credit_for_blkdel(handle, inode);
-	ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
 			 EXT4_FREE_BLOCKS_METADATA |
 			 EXT4_FREE_BLOCKS_FORGET);
 	return 0;
@@ -302,7 +302,7 @@
 	}
 	put_bh(bh);
 	extend_credit_for_blkdel(handle, inode);
-	ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
 			 EXT4_FREE_BLOCKS_METADATA |
 			 EXT4_FREE_BLOCKS_FORGET);
 	return 0;
@@ -315,7 +315,7 @@
 	/* ei->i_data[EXT4_IND_BLOCK] */
 	if (i_data[0]) {
 		extend_credit_for_blkdel(handle, inode);
-		ext4_free_blocks(handle, inode, 0,
+		ext4_free_blocks(handle, inode, NULL,
 				le32_to_cpu(i_data[0]), 1,
 				 EXT4_FREE_BLOCKS_METADATA |
 				 EXT4_FREE_BLOCKS_FORGET);
@@ -428,7 +428,7 @@
 	}
 	put_bh(bh);
 	extend_credit_for_blkdel(handle, inode);
-	ext4_free_blocks(handle, inode, 0, block, 1,
+	ext4_free_blocks(handle, inode, NULL, block, 1,
 			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
 	return retval;
 }

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e781b7e..67fd0b0 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c

@@ -40,6 +40,7 @@
 #include "xattr.h"
 #include "acl.h"
 
+#include <trace/events/ext4.h>
 /*
  * define how far ahead to read directories while searching them.
  */
@@ -2183,6 +2184,7 @@
 	struct ext4_dir_entry_2 *de;
 	handle_t *handle;
 
+	trace_ext4_unlink_enter(dir, dentry);
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	dquot_initialize(dir);
@@ -2228,6 +2230,7 @@
 end_unlink:
 	ext4_journal_stop(handle);
 	brelse(bh);
+	trace_ext4_unlink_exit(dentry, retval);
 	return retval;
 }
 
@@ -2402,6 +2405,10 @@
 		if (!new_inode && new_dir != old_dir &&
 		    EXT4_DIR_LINK_MAX(new_dir))
 			goto end_rename;
+		BUFFER_TRACE(dir_bh, "get_write_access");
+		retval = ext4_journal_get_write_access(handle, dir_bh);
+		if (retval)
+			goto end_rename;
 	}
 	if (!new_bh) {
 		retval = ext4_add_entry(handle, new_dentry, old_inode);
@@ -2409,7 +2416,9 @@
 			goto end_rename;
 	} else {
 		BUFFER_TRACE(new_bh, "get write access");
-		ext4_journal_get_write_access(handle, new_bh);
+		retval = ext4_journal_get_write_access(handle, new_bh);
+		if (retval)
+			goto end_rename;
 		new_de->inode = cpu_to_le32(old_inode->i_ino);
 		if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
 					      EXT4_FEATURE_INCOMPAT_FILETYPE))
@@ -2470,8 +2479,6 @@
 	old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
 	ext4_update_dx_flag(old_dir);
 	if (dir_bh) {
-		BUFFER_TRACE(dir_bh, "get_write_access");
-		ext4_journal_get_write_access(handle, dir_bh);
 		PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
 						cpu_to_le32(new_dir->i_ino);
 		BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index e2cd90e..b6dbd05 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c

@@ -259,6 +259,11 @@
 			     bi_sector >> (inode->i_blkbits - 9));
 	}
 
+	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+		ext4_free_io_end(io_end);
+		return;
+	}
+
 	/* Add the io_end to per-inode completed io list*/
 	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
 	list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
@@ -279,9 +284,9 @@
 		BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
 		bio_put(io->io_bio);
 	}
-	io->io_bio = 0;
+	io->io_bio = NULL;
 	io->io_op = 0;
-	io->io_end = 0;
+	io->io_end = NULL;
 }
 
 static int io_submit_init(struct ext4_io_submit *io,
@@ -380,8 +385,6 @@
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
-	set_page_writeback(page);
-	ClearPageError(page);
 
 	io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
 	if (!io_page) {
@@ -392,6 +395,8 @@
 	io_page->p_page = page;
 	atomic_set(&io_page->p_count, 1);
 	get_page(page);
+	set_page_writeback(page);
+	ClearPageError(page);
 
 	for (bh = head = page_buffers(page), block_start = 0;
 	     bh != head || !block_start;

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3ecc6e4..80bbc9c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c

@@ -230,7 +230,7 @@
 	}
 
 	/* Zero out all of the reserved backup group descriptor table blocks */
-	ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+	ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
 			block, sbi->s_itb_per_group);
 	err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
 			       GFP_NOFS);
@@ -248,7 +248,7 @@
 
 	/* Zero out all of the inode table blocks */
 	block = input->inode_table;
-	ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+	ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
 			block, sbi->s_itb_per_group);
 	err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
 	if (err)
@@ -499,12 +499,12 @@
 	return err;
 
 exit_inode:
-	/* ext4_journal_release_buffer(handle, iloc.bh); */
+	/* ext4_handle_release_buffer(handle, iloc.bh); */
 	brelse(iloc.bh);
 exit_dindj:
-	/* ext4_journal_release_buffer(handle, dind); */
+	/* ext4_handle_release_buffer(handle, dind); */
 exit_sbh:
-	/* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
+	/* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
 exit_dind:
 	brelse(dind);
 exit_bh:
@@ -586,7 +586,7 @@
 			/*
 			int j;
 			for (j = 0; j < i; j++)
-				ext4_journal_release_buffer(handle, primary[j]);
+				ext4_handle_release_buffer(handle, primary[j]);
 			 */
 			goto exit_bh;
 		}

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 203f9e4..22546ad 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c

@@ -54,9 +54,9 @@
 
 static struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
-struct ext4_lazy_init *ext4_li_info;
-struct mutex ext4_li_mtx;
-struct ext4_features *ext4_feat;
+static struct ext4_lazy_init *ext4_li_info;
+static struct mutex ext4_li_mtx;
+static struct ext4_features *ext4_feat;
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
@@ -75,6 +75,7 @@
 static int ext4_freeze(struct super_block *sb);
 static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
 		       const char *dev_name, void *data);
+static int ext4_feature_set_ok(struct super_block *sb, int readonly);
 static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
 static void ext4_clear_request_list(void);
@@ -594,7 +595,7 @@
 
 	vaf.fmt = fmt;
 	vaf.va = &args;
-	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
+	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 	       sb->s_id, function, line, grp);
 	if (ino)
 		printk(KERN_CONT "inode %lu: ", ino);
@@ -997,13 +998,10 @@
 	if (test_opt(sb, OLDALLOC))
 		seq_puts(seq, ",oldalloc");
 #ifdef CONFIG_EXT4_FS_XATTR
-	if (test_opt(sb, XATTR_USER) &&
-		!(def_mount_opts & EXT4_DEFM_XATTR_USER))
+	if (test_opt(sb, XATTR_USER))
 		seq_puts(seq, ",user_xattr");
-	if (!test_opt(sb, XATTR_USER) &&
-	    (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
+	if (!test_opt(sb, XATTR_USER))
 		seq_puts(seq, ",nouser_xattr");
-	}
 #endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 	if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
@@ -1041,8 +1039,8 @@
 	    !(def_mount_opts & EXT4_DEFM_NODELALLOC))
 		seq_puts(seq, ",nodelalloc");
 
-	if (test_opt(sb, MBLK_IO_SUBMIT))
-		seq_puts(seq, ",mblk_io_submit");
+	if (!test_opt(sb, MBLK_IO_SUBMIT))
+		seq_puts(seq, ",nomblk_io_submit");
 	if (sbi->s_stripe)
 		seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
 	/*
@@ -1451,7 +1449,7 @@
 		 * Initialize args struct so we know whether arg was
 		 * found; some options take optional arguments.
 		 */
-		args[0].to = args[0].from = 0;
+		args[0].to = args[0].from = NULL;
 		token = match_token(p, tokens, args);
 		switch (token) {
 		case Opt_bsd_df:
@@ -1771,7 +1769,7 @@
 				return 0;
 			if (option < 0 || option > (1 << 30))
 				return 0;
-			if (!is_power_of_2(option)) {
+			if (option && !is_power_of_2(option)) {
 				ext4_msg(sb, KERN_ERR,
 					 "EXT4-fs: inode_readahead_blks"
 					 " must be a power of 2");
@@ -2120,6 +2118,13 @@
 		return;
 	}
 
+	/* Check if feature set would not allow a r/w mount */
+	if (!ext4_feature_set_ok(sb, 0)) {
+		ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
+			 "unknown ROCOMPAT features");
+		return;
+	}
+
 	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
 		if (es->s_last_orphan)
 			jbd_debug(1, "Errors on filesystem, "
@@ -2412,7 +2417,7 @@
 	if (parse_strtoul(buf, 0x40000000, &t))
 		return -EINVAL;
 
-	if (!is_power_of_2(t))
+	if (t && !is_power_of_2(t))
 		return -EINVAL;
 
 	sbi->s_inode_readahead_blks = t;
@@ -3095,14 +3100,14 @@
 	}
 	if (def_mount_opts & EXT4_DEFM_UID16)
 		set_opt(sb, NO_UID32);
+	/* xattr user namespace & acls are now defaulted on */
 #ifdef CONFIG_EXT4_FS_XATTR
-	if (def_mount_opts & EXT4_DEFM_XATTR_USER)
-		set_opt(sb, XATTR_USER);
+	set_opt(sb, XATTR_USER);
 #endif
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
-	if (def_mount_opts & EXT4_DEFM_ACL)
-		set_opt(sb, POSIX_ACL);
+	set_opt(sb, POSIX_ACL);
 #endif
+	set_opt(sb, MBLK_IO_SUBMIT);
 	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
 		set_opt(sb, JOURNAL_DATA);
 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3516,7 +3521,7 @@
 	 * concurrency isn't really necessary.  Limit it to 1.
 	 */
 	EXT4_SB(sb)->dio_unwritten_wq =
-		alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
+		alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
 	if (!EXT4_SB(sb)->dio_unwritten_wq) {
 		printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
 		goto failed_mount_wq;
@@ -3531,17 +3536,16 @@
 	if (IS_ERR(root)) {
 		ext4_msg(sb, KERN_ERR, "get root inode failed");
 		ret = PTR_ERR(root);
+		root = NULL;
 		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
-		iput(root);
 		ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
 		goto failed_mount4;
 	}
 	sb->s_root = d_alloc_root(root);
 	if (!sb->s_root) {
 		ext4_msg(sb, KERN_ERR, "get root dentry failed");
-		iput(root);
 		ret = -ENOMEM;
 		goto failed_mount4;
 	}
@@ -3657,6 +3661,8 @@
 	goto failed_mount;
 
 failed_mount4:
+	iput(root);
+	sb->s_root = NULL;
 	ext4_msg(sb, KERN_ERR, "mount failed");
 	destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
 failed_mount_wq:

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fc32176..b545ca1 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c

@@ -735,7 +735,7 @@
 			int offset = (char *)s->here - bs->bh->b_data;
 
 			unlock_buffer(bs->bh);
-			jbd2_journal_release_buffer(handle, bs->bh);
+			ext4_handle_release_buffer(handle, bs->bh);
 			if (ce) {
 				mb_cache_entry_release(ce);
 				ce = NULL;
@@ -833,7 +833,7 @@
 			new_bh = sb_getblk(sb, block);
 			if (!new_bh) {
 getblk_failed:
-				ext4_free_blocks(handle, inode, 0, block, 1,
+				ext4_free_blocks(handle, inode, NULL, block, 1,
 						 EXT4_FREE_BLOCKS_METADATA);
 				error = -EIO;
 				goto cleanup;

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 59c6e49..b5ed541 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c

@@ -176,6 +176,17 @@
 }
 
 /*
+ * Remove the inode from the writeback list it is on.
+ */
+void inode_wb_list_del(struct inode *inode)
+{
+	spin_lock(&inode_wb_list_lock);
+	list_del_init(&inode->i_wb_list);
+	spin_unlock(&inode_wb_list_lock);
+}
+
+
+/*
  * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
  * furthest end of its superblock's dirty-inode list.
  *
@@ -188,6 +199,7 @@
 {
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
+	assert_spin_locked(&inode_wb_list_lock);
 	if (!list_empty(&wb->b_dirty)) {
 		struct inode *tail;
 
@@ -205,14 +217,17 @@
 {
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
+	assert_spin_locked(&inode_wb_list_lock);
 	list_move(&inode->i_wb_list, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
 {
 	/*
-	 * Prevent speculative execution through spin_unlock(&inode_lock);
+	 * Prevent speculative execution through
+	 * spin_unlock(&inode_wb_list_lock);
 	 */
+
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_SYNC);
 }
@@ -286,6 +301,7 @@
  */
 static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
 {
+	assert_spin_locked(&inode_wb_list_lock);
 	list_splice_init(&wb->b_more_io, &wb->b_io);
 	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
 }
@@ -306,25 +322,25 @@
 	wait_queue_head_t *wqh;
 
 	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
-	 while (inode->i_state & I_SYNC) {
-		spin_unlock(&inode_lock);
+	while (inode->i_state & I_SYNC) {
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&inode_wb_list_lock);
 		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
-		spin_lock(&inode_lock);
+		spin_lock(&inode_wb_list_lock);
+		spin_lock(&inode->i_lock);
 	}
 }
 
 /*
- * Write out an inode's dirty pages.  Called under inode_lock.  Either the
- * caller has ref on the inode (either via __iget or via syscall against an fd)
- * or the inode has I_WILL_FREE set (via generic_forget_inode)
+ * Write out an inode's dirty pages.  Called under inode_wb_list_lock and
+ * inode->i_lock.  Either the caller has an active reference on the inode or
+ * the inode has I_WILL_FREE set.
  *
  * If `wait' is set, wait on the writeout.
  *
  * The whole writeout design is quite complex and fragile.  We want to avoid
  * starvation of particular inodes when others are being redirtied, prevent
  * livelocks, etc.
- *
- * Called under inode_lock.
  */
 static int
 writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -333,6 +349,9 @@
 	unsigned dirty;
 	int ret;
 
+	assert_spin_locked(&inode_wb_list_lock);
+	assert_spin_locked(&inode->i_lock);
+
 	if (!atomic_read(&inode->i_count))
 		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
 	else
@@ -363,7 +382,8 @@
 	/* Set I_SYNC, reset I_DIRTY_PAGES */
 	inode->i_state |= I_SYNC;
 	inode->i_state &= ~I_DIRTY_PAGES;
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&inode_wb_list_lock);
 
 	ret = do_writepages(mapping, wbc);
 
@@ -383,10 +403,10 @@
 	 * due to delalloc, clear dirty metadata flags right before
 	 * write_inode()
 	 */
-	spin_lock(&inode_lock);
+	spin_lock(&inode->i_lock);
 	dirty = inode->i_state & I_DIRTY;
 	inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
 	/* Don't write the inode if only I_DIRTY_PAGES was set */
 	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
 		int err = write_inode(inode, wbc);
@@ -394,7 +414,8 @@
 			ret = err;
 	}
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
+	spin_lock(&inode->i_lock);
 	inode->i_state &= ~I_SYNC;
 	if (!(inode->i_state & I_FREEING)) {
 		if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@@ -506,7 +527,9 @@
 		 * kind does not need peridic writeout yet, and for the latter
 		 * kind writeout is handled by the freer.
 		 */
+		spin_lock(&inode->i_lock);
 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+			spin_unlock(&inode->i_lock);
 			requeue_io(inode);
 			continue;
 		}
@@ -515,10 +538,13 @@
 		 * Was this inode dirtied after sync_sb_inodes was called?
 		 * This keeps sync from extra jobs and livelock.
 		 */
-		if (inode_dirtied_after(inode, wbc->wb_start))
+		if (inode_dirtied_after(inode, wbc->wb_start)) {
+			spin_unlock(&inode->i_lock);
 			return 1;
+		}
 
 		__iget(inode);
+
 		pages_skipped = wbc->pages_skipped;
 		writeback_single_inode(inode, wbc);
 		if (wbc->pages_skipped != pages_skipped) {
@@ -528,10 +554,11 @@
 			 */
 			redirty_tail(inode);
 		}
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&inode_wb_list_lock);
 		iput(inode);
 		cond_resched();
-		spin_lock(&inode_lock);
+		spin_lock(&inode_wb_list_lock);
 		if (wbc->nr_to_write <= 0) {
 			wbc->more_io = 1;
 			return 1;
@@ -550,7 +577,7 @@
 
 	if (!wbc->wb_start)
 		wbc->wb_start = jiffies; /* livelock avoidance */
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	if (!wbc->for_kupdate || list_empty(&wb->b_io))
 		queue_io(wb, wbc->older_than_this);
 
@@ -568,7 +595,7 @@
 		if (ret)
 			break;
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 	/* Leave any unwritten inodes on b_io */
 }
 
@@ -577,11 +604,11 @@
 {
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	if (!wbc->for_kupdate || list_empty(&wb->b_io))
 		queue_io(wb, wbc->older_than_this);
 	writeback_sb_inodes(sb, wb, wbc, true);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 }
 
 /*
@@ -720,13 +747,15 @@
 		 * become available for writeback. Otherwise
 		 * we'll just busyloop.
 		 */
-		spin_lock(&inode_lock);
+		spin_lock(&inode_wb_list_lock);
 		if (!list_empty(&wb->b_more_io))  {
 			inode = wb_inode(wb->b_more_io.prev);
 			trace_wbc_writeback_wait(&wbc, wb->bdi);
+			spin_lock(&inode->i_lock);
 			inode_wait_for_writeback(inode);
+			spin_unlock(&inode->i_lock);
 		}
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_wb_list_lock);
 	}
 
 	return wrote;
@@ -992,7 +1021,6 @@
 {
 	struct super_block *sb = inode->i_sb;
 	struct backing_dev_info *bdi = NULL;
-	bool wakeup_bdi = false;
 
 	/*
 	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -1016,7 +1044,7 @@
 	if (unlikely(block_dump))
 		block_dump___mark_inode_dirty(inode);
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode->i_lock);
 	if ((inode->i_state & flags) != flags) {
 		const int was_dirty = inode->i_state & I_DIRTY;
 
@@ -1028,7 +1056,7 @@
 		 * superblock list, based upon its state.
 		 */
 		if (inode->i_state & I_SYNC)
-			goto out;
+			goto out_unlock_inode;
 
 		/*
 		 * Only add valid (hashed) inodes to the superblock's
@@ -1036,16 +1064,17 @@
 		 */
 		if (!S_ISBLK(inode->i_mode)) {
 			if (inode_unhashed(inode))
-				goto out;
+				goto out_unlock_inode;
 		}
 		if (inode->i_state & I_FREEING)
-			goto out;
+			goto out_unlock_inode;
 
 		/*
 		 * If the inode was already on b_dirty/b_io/b_more_io, don't
 		 * reposition it (that would break b_dirty time-ordering).
 		 */
 		if (!was_dirty) {
+			bool wakeup_bdi = false;
 			bdi = inode_to_bdi(inode);
 
 			if (bdi_cap_writeback_dirty(bdi)) {
@@ -1062,15 +1091,20 @@
 					wakeup_bdi = true;
 			}
 
+			spin_unlock(&inode->i_lock);
+			spin_lock(&inode_wb_list_lock);
 			inode->dirtied_when = jiffies;
 			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+			spin_unlock(&inode_wb_list_lock);
+
+			if (wakeup_bdi)
+				bdi_wakeup_thread_delayed(bdi);
+			return;
 		}
 	}
-out:
-	spin_unlock(&inode_lock);
+out_unlock_inode:
+	spin_unlock(&inode->i_lock);
 
-	if (wakeup_bdi)
-		bdi_wakeup_thread_delayed(bdi);
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
 
@@ -1101,7 +1135,7 @@
 	 */
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 
 	/*
 	 * Data integrity sync. Must wait for all pages under writeback,
@@ -1111,22 +1145,25 @@
 	 * we still have to wait for that writeout.
 	 */
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		struct address_space *mapping;
+		struct address_space *mapping = inode->i_mapping;
 
-		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+		spin_lock(&inode->i_lock);
+		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+		    (mapping->nrpages == 0)) {
+			spin_unlock(&inode->i_lock);
 			continue;
-		mapping = inode->i_mapping;
-		if (mapping->nrpages == 0)
-			continue;
+		}
 		__iget(inode);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&inode_sb_list_lock);
+
 		/*
-		 * We hold a reference to 'inode' so it couldn't have
-		 * been removed from s_inodes list while we dropped the
-		 * inode_lock.  We cannot iput the inode now as we can
-		 * be holding the last reference and we cannot iput it
-		 * under inode_lock. So we keep the reference and iput
-		 * it later.
+		 * We hold a reference to 'inode' so it couldn't have been
+		 * removed from s_inodes list while we dropped the
+		 * inode_sb_list_lock.  We cannot iput the inode now as we can
+		 * be holding the last reference and we cannot iput it under
+		 * inode_sb_list_lock. So we keep the reference and iput it
+		 * later.
 		 */
 		iput(old_inode);
 		old_inode = inode;
@@ -1135,9 +1172,9 @@
 
 		cond_resched();
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_sb_list_lock);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 	iput(old_inode);
 }
 
@@ -1271,9 +1308,11 @@
 		wbc.nr_to_write = 0;
 
 	might_sleep();
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
+	spin_lock(&inode->i_lock);
 	ret = writeback_single_inode(inode, &wbc);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&inode_wb_list_lock);
 	if (sync)
 		inode_sync_wait(inode);
 	return ret;
@@ -1295,9 +1334,11 @@
 {
 	int ret;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
+	spin_lock(&inode->i_lock);
 	ret = writeback_single_inode(inode, wbc);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&inode_wb_list_lock);
 	return ret;
 }
 EXPORT_SYMBOL(sync_inode);

diff --git a/fs/inode.c b/fs/inode.c
index 0b3da4a..5f4e11a 100644
--- a/fs/inode.c
+++ b/fs/inode.c

@@ -26,6 +26,38 @@
 #include <linux/posix_acl.h>
 #include <linux/ima.h>
 #include <linux/cred.h>
+#include "internal.h"
+
+/*
+ * inode locking rules.
+ *
+ * inode->i_lock protects:
+ *   inode->i_state, inode->i_hash, __iget()
+ * inode_lru_lock protects:
+ *   inode_lru, inode->i_lru
+ * inode_sb_list_lock protects:
+ *   sb->s_inodes, inode->i_sb_list
+ * inode_wb_list_lock protects:
+ *   bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
+ * inode_hash_lock protects:
+ *   inode_hashtable, inode->i_hash
+ *
+ * Lock ordering:
+ *
+ * inode_sb_list_lock
+ *   inode->i_lock
+ *     inode_lru_lock
+ *
+ * inode_wb_list_lock
+ *   inode->i_lock
+ *
+ * inode_hash_lock
+ *   inode_sb_list_lock
+ *   inode->i_lock
+ *
+ * iunique_lock
+ *   inode_hash_lock
+ */
 
 /*
  * This is needed for the following functions:
@@ -60,6 +92,8 @@
 
 static unsigned int i_hash_mask __read_mostly;
 static unsigned int i_hash_shift __read_mostly;
+static struct hlist_head *inode_hashtable __read_mostly;
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
 
 /*
  * Each inode can be on two separate lists. One is
@@ -74,15 +108,10 @@
  */
 
 static LIST_HEAD(inode_lru);
-static struct hlist_head *inode_hashtable __read_mostly;
+static DEFINE_SPINLOCK(inode_lru_lock);
 
-/*
- * A simple spinlock to protect the list manipulations.
- *
- * NOTE! You also have to own the lock if you change
- * the i_state of an inode while it is in use..
- */
-DEFINE_SPINLOCK(inode_lock);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
 
 /*
  * iprune_sem provides exclusion between the icache shrinking and the
@@ -137,15 +166,6 @@
 }
 #endif
 
-static void wake_up_inode(struct inode *inode)
-{
-	/*
-	 * Prevent speculative execution through spin_unlock(&inode_lock);
-	 */
-	smp_mb();
-	wake_up_bit(&inode->i_state, __I_NEW);
-}
-
 /**
  * inode_init_always - perform inode structure intialisation
  * @sb: superblock inode belongs to
@@ -336,7 +356,7 @@
 }
 
 /*
- * inode_lock must be held
+ * inode->i_lock must be held
  */
 void __iget(struct inode *inode)
 {
@@ -354,23 +374,22 @@
 
 static void inode_lru_list_add(struct inode *inode)
 {
+	spin_lock(&inode_lru_lock);
 	if (list_empty(&inode->i_lru)) {
 		list_add(&inode->i_lru, &inode_lru);
 		inodes_stat.nr_unused++;
 	}
+	spin_unlock(&inode_lru_lock);
 }
 
 static void inode_lru_list_del(struct inode *inode)
 {
+	spin_lock(&inode_lru_lock);
 	if (!list_empty(&inode->i_lru)) {
 		list_del_init(&inode->i_lru);
 		inodes_stat.nr_unused--;
 	}
-}
-
-static inline void __inode_sb_list_add(struct inode *inode)
-{
-	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
+	spin_unlock(&inode_lru_lock);
 }
 
 /**
@@ -379,15 +398,17 @@
  */
 void inode_sb_list_add(struct inode *inode)
 {
-	spin_lock(&inode_lock);
-	__inode_sb_list_add(inode);
-	spin_unlock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
+	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
+	spin_unlock(&inode_sb_list_lock);
 }
 EXPORT_SYMBOL_GPL(inode_sb_list_add);
 
-static inline void __inode_sb_list_del(struct inode *inode)
+static inline void inode_sb_list_del(struct inode *inode)
 {
+	spin_lock(&inode_sb_list_lock);
 	list_del_init(&inode->i_sb_list);
+	spin_unlock(&inode_sb_list_lock);
 }
 
 static unsigned long hash(struct super_block *sb, unsigned long hashval)
@@ -412,24 +433,15 @@
 {
 	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
+	spin_lock(&inode->i_lock);
 	hlist_add_head(&inode->i_hash, b);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&inode_hash_lock);
 }
 EXPORT_SYMBOL(__insert_inode_hash);
 
 /**
- *	__remove_inode_hash - remove an inode from the hash
- *	@inode: inode to unhash
- *
- *	Remove an inode from the superblock.
- */
-static void __remove_inode_hash(struct inode *inode)
-{
-	hlist_del_init(&inode->i_hash);
-}
-
-/**
  *	remove_inode_hash - remove an inode from the hash
  *	@inode: inode to unhash
  *
@@ -437,9 +449,11 @@
  */
 void remove_inode_hash(struct inode *inode)
 {
-	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
+	spin_lock(&inode->i_lock);
 	hlist_del_init(&inode->i_hash);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&inode_hash_lock);
 }
 EXPORT_SYMBOL(remove_inode_hash);
 
@@ -456,10 +470,29 @@
 }
 EXPORT_SYMBOL(end_writeback);
 
+/*
+ * Free the inode passed in, removing it from the lists it is still connected
+ * to. We remove any pages still attached to the inode and wait for any IO that
+ * is still in progress before finally destroying the inode.
+ *
+ * An inode must already be marked I_FREEING so that we avoid the inode being
+ * moved back onto lists if we race with other code that manipulates the lists
+ * (e.g. writeback_single_inode). The caller is responsible for setting this.
+ *
+ * An inode must already be removed from the LRU list before being evicted from
+ * the cache. This should occur atomically with setting the I_FREEING state
+ * flag, so no inodes here should ever be on the LRU when being evicted.
+ */
 static void evict(struct inode *inode)
 {
 	const struct super_operations *op = inode->i_sb->s_op;
 
+	BUG_ON(!(inode->i_state & I_FREEING));
+	BUG_ON(!list_empty(&inode->i_lru));
+
+	inode_wb_list_del(inode);
+	inode_sb_list_del(inode);
+
 	if (op->evict_inode) {
 		op->evict_inode(inode);
 	} else {
@@ -471,6 +504,15 @@
 		bd_forget(inode);
 	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
 		cd_forget(inode);
+
+	remove_inode_hash(inode);
+
+	spin_lock(&inode->i_lock);
+	wake_up_bit(&inode->i_state, __I_NEW);
+	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
+	spin_unlock(&inode->i_lock);
+
+	destroy_inode(inode);
 }
 
 /*
@@ -489,14 +531,6 @@
 		list_del_init(&inode->i_lru);
 
 		evict(inode);
-
-		spin_lock(&inode_lock);
-		__remove_inode_hash(inode);
-		__inode_sb_list_del(inode);
-		spin_unlock(&inode_lock);
-
-		wake_up_inode(inode);
-		destroy_inode(inode);
 	}
 }
 
@@ -514,25 +548,23 @@
 	struct inode *inode, *next;
 	LIST_HEAD(dispose);
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
 		if (atomic_read(&inode->i_count))
 			continue;
-		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
+
+		spin_lock(&inode->i_lock);
+		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+			spin_unlock(&inode->i_lock);
 			continue;
+		}
 
 		inode->i_state |= I_FREEING;
-
-		/*
-		 * Move the inode off the IO lists and LRU once I_FREEING is
-		 * set so that it won't get moved back on there if it is dirty.
-		 */
-		list_move(&inode->i_lru, &dispose);
-		list_del_init(&inode->i_wb_list);
-		if (!(inode->i_state & (I_DIRTY | I_SYNC)))
-			inodes_stat.nr_unused--;
+		inode_lru_list_del(inode);
+		spin_unlock(&inode->i_lock);
+		list_add(&inode->i_lru, &dispose);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 
 	dispose_list(&dispose);
 
@@ -561,31 +593,30 @@
 	struct inode *inode, *next;
 	LIST_HEAD(dispose);
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
-		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
+		spin_lock(&inode->i_lock);
+		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+			spin_unlock(&inode->i_lock);
 			continue;
+		}
 		if (inode->i_state & I_DIRTY && !kill_dirty) {
+			spin_unlock(&inode->i_lock);
 			busy = 1;
 			continue;
 		}
 		if (atomic_read(&inode->i_count)) {
+			spin_unlock(&inode->i_lock);
 			busy = 1;
 			continue;
 		}
 
 		inode->i_state |= I_FREEING;
-
-		/*
-		 * Move the inode off the IO lists and LRU once I_FREEING is
-		 * set so that it won't get moved back on there if it is dirty.
-		 */
-		list_move(&inode->i_lru, &dispose);
-		list_del_init(&inode->i_wb_list);
-		if (!(inode->i_state & (I_DIRTY | I_SYNC)))
-			inodes_stat.nr_unused--;
+		inode_lru_list_del(inode);
+		spin_unlock(&inode->i_lock);
+		list_add(&inode->i_lru, &dispose);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 
 	dispose_list(&dispose);
 
@@ -607,7 +638,7 @@
 
 /*
  * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
- * temporary list and then are freed outside inode_lock by dispose_list().
+ * temporary list and then are freed outside inode_lru_lock by dispose_list().
  *
  * Any inodes which are pinned purely because of attached pagecache have their
  * pagecache removed.  If the inode has metadata buffers attached to
@@ -628,7 +659,7 @@
 	unsigned long reap = 0;
 
 	down_read(&iprune_sem);
-	spin_lock(&inode_lock);
+	spin_lock(&inode_lru_lock);
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
 		struct inode *inode;
 
@@ -638,53 +669,67 @@
 		inode = list_entry(inode_lru.prev, struct inode, i_lru);
 
 		/*
+		 * we are inverting the inode_lru_lock/inode->i_lock here,
+		 * so use a trylock. If we fail to get the lock, just move the
+		 * inode to the back of the list so we don't spin on it.
+		 */
+		if (!spin_trylock(&inode->i_lock)) {
+			list_move(&inode->i_lru, &inode_lru);
+			continue;
+		}
+
+		/*
 		 * Referenced or dirty inodes are still in use. Give them
 		 * another pass through the LRU as we canot reclaim them now.
 		 */
 		if (atomic_read(&inode->i_count) ||
 		    (inode->i_state & ~I_REFERENCED)) {
 			list_del_init(&inode->i_lru);
+			spin_unlock(&inode->i_lock);
 			inodes_stat.nr_unused--;
 			continue;
 		}
 
 		/* recently referenced inodes get one more pass */
 		if (inode->i_state & I_REFERENCED) {
-			list_move(&inode->i_lru, &inode_lru);
 			inode->i_state &= ~I_REFERENCED;
+			list_move(&inode->i_lru, &inode_lru);
+			spin_unlock(&inode->i_lock);
 			continue;
 		}
 		if (inode_has_buffers(inode) || inode->i_data.nrpages) {
 			__iget(inode);
-			spin_unlock(&inode_lock);
+			spin_unlock(&inode->i_lock);
+			spin_unlock(&inode_lru_lock);
 			if (remove_inode_buffers(inode))
 				reap += invalidate_mapping_pages(&inode->i_data,
 								0, -1);
 			iput(inode);
-			spin_lock(&inode_lock);
+			spin_lock(&inode_lru_lock);
 
 			if (inode != list_entry(inode_lru.next,
 						struct inode, i_lru))
 				continue;	/* wrong inode or list_empty */
-			if (!can_unuse(inode))
+			/* avoid lock inversions with trylock */
+			if (!spin_trylock(&inode->i_lock))
 				continue;
+			if (!can_unuse(inode)) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
 		}
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_FREEING;
+		spin_unlock(&inode->i_lock);
 
-		/*
-		 * Move the inode off the IO lists and LRU once I_FREEING is
-		 * set so that it won't get moved back on there if it is dirty.
-		 */
 		list_move(&inode->i_lru, &freeable);
-		list_del_init(&inode->i_wb_list);
 		inodes_stat.nr_unused--;
 	}
 	if (current_is_kswapd())
 		__count_vm_events(KSWAPD_INODESTEAL, reap);
 	else
 		__count_vm_events(PGINODESTEAL, reap);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_lru_lock);
 
 	dispose_list(&freeable);
 	up_read(&iprune_sem);
@@ -733,15 +778,21 @@
 
 repeat:
 	hlist_for_each_entry(inode, node, head, i_hash) {
-		if (inode->i_sb != sb)
+		spin_lock(&inode->i_lock);
+		if (inode->i_sb != sb) {
+			spin_unlock(&inode->i_lock);
 			continue;
-		if (!test(inode, data))
+		}
+		if (!test(inode, data)) {
+			spin_unlock(&inode->i_lock);
 			continue;
+		}
 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
 		__iget(inode);
+		spin_unlock(&inode->i_lock);
 		return inode;
 	}
 	return NULL;
@@ -759,15 +810,21 @@
 
 repeat:
 	hlist_for_each_entry(inode, node, head, i_hash) {
-		if (inode->i_ino != ino)
+		spin_lock(&inode->i_lock);
+		if (inode->i_ino != ino) {
+			spin_unlock(&inode->i_lock);
 			continue;
-		if (inode->i_sb != sb)
+		}
+		if (inode->i_sb != sb) {
+			spin_unlock(&inode->i_lock);
 			continue;
+		}
 		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
 		__iget(inode);
+		spin_unlock(&inode->i_lock);
 		return inode;
 	}
 	return NULL;
@@ -827,19 +884,26 @@
 {
 	struct inode *inode;
 
-	spin_lock_prefetch(&inode_lock);
+	spin_lock_prefetch(&inode_sb_list_lock);
 
 	inode = alloc_inode(sb);
 	if (inode) {
-		spin_lock(&inode_lock);
-		__inode_sb_list_add(inode);
+		spin_lock(&inode->i_lock);
 		inode->i_state = 0;
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode->i_lock);
+		inode_sb_list_add(inode);
 	}
 	return inode;
 }
 EXPORT_SYMBOL(new_inode);
 
+/**
+ * unlock_new_inode - clear the I_NEW state and wake up any waiters
+ * @inode:	new inode to unlock
+ *
+ * Called when the inode is fully initialised to clear the new state of the
+ * inode and wake up anyone waiting for the inode to finish initialisation.
+ */
 void unlock_new_inode(struct inode *inode)
 {
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -859,51 +923,67 @@
 		}
 	}
 #endif
-	/*
-	 * This is special!  We do not need the spinlock when clearing I_NEW,
-	 * because we're guaranteed that nobody else tries to do anything about
-	 * the state of the inode when it is locked, as we just created it (so
-	 * there can be no old holders that haven't tested I_NEW).
-	 * However we must emit the memory barrier so that other CPUs reliably
-	 * see the clearing of I_NEW after the other inode initialisation has
-	 * completed.
-	 */
-	smp_mb();
+	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode->i_state & I_NEW));
 	inode->i_state &= ~I_NEW;
-	wake_up_inode(inode);
+	wake_up_bit(&inode->i_state, __I_NEW);
+	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(unlock_new_inode);
 
-/*
- * This is called without the inode lock held.. Be careful.
+/**
+ * iget5_locked - obtain an inode from a mounted file system
+ * @sb:		super block of file system
+ * @hashval:	hash value (usually inode number) to get
+ * @test:	callback used for comparisons between inodes
+ * @set:	callback used to initialize a new struct inode
+ * @data:	opaque data pointer to pass to @test and @set
  *
- * We no longer cache the sb_flags in i_flags - see fs.h
- *	-- rmk@arm.uk.linux.org
+ * Search for the inode specified by @hashval and @data in the inode cache,
+ * and if present it is return it with an increased reference count. This is
+ * a generalized version of iget_locked() for file systems where the inode
+ * number is not sufficient for unique identification of an inode.
+ *
+ * If the inode is not in cache, allocate a new inode and return it locked,
+ * hashed, and with the I_NEW flag set. The file system gets to fill it in
+ * before unlocking it via unlock_new_inode().
+ *
+ * Note both @test and @set are called with the inode_hash_lock held, so can't
+ * sleep.
  */
-static struct inode *get_new_inode(struct super_block *sb,
-				struct hlist_head *head,
-				int (*test)(struct inode *, void *),
-				int (*set)(struct inode *, void *),
-				void *data)
+struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
+		int (*test)(struct inode *, void *),
+		int (*set)(struct inode *, void *), void *data)
 {
+	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 	struct inode *inode;
 
+	spin_lock(&inode_hash_lock);
+	inode = find_inode(sb, head, test, data);
+	spin_unlock(&inode_hash_lock);
+
+	if (inode) {
+		wait_on_inode(inode);
+		return inode;
+	}
+
 	inode = alloc_inode(sb);
 	if (inode) {
 		struct inode *old;
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
 		old = find_inode(sb, head, test, data);
 		if (!old) {
 			if (set(inode, data))
 				goto set_failed;
 
-			hlist_add_head(&inode->i_hash, head);
-			__inode_sb_list_add(inode);
+			spin_lock(&inode->i_lock);
 			inode->i_state = I_NEW;
-			spin_unlock(&inode_lock);
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode->i_lock);
+			inode_sb_list_add(inode);
+			spin_unlock(&inode_hash_lock);
 
 			/* Return the locked inode with I_NEW set, the
 			 * caller is responsible for filling in the contents
@@ -916,7 +996,7 @@
 		 * us. Use the old inode instead of the one we just
 		 * allocated.
 		 */
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_hash_lock);
 		destroy_inode(inode);
 		inode = old;
 		wait_on_inode(inode);
@@ -924,33 +1004,53 @@
 	return inode;
 
 set_failed:
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_hash_lock);
 	destroy_inode(inode);
 	return NULL;
 }
+EXPORT_SYMBOL(iget5_locked);
 
-/*
- * get_new_inode_fast is the fast path version of get_new_inode, see the
- * comment at iget_locked for details.
+/**
+ * iget_locked - obtain an inode from a mounted file system
+ * @sb:		super block of file system
+ * @ino:	inode number to get
+ *
+ * Search for the inode specified by @ino in the inode cache and if present
+ * return it with an increased reference count. This is for file systems
+ * where the inode number is sufficient for unique identification of an inode.
+ *
+ * If the inode is not in cache, allocate a new inode and return it locked,
+ * hashed, and with the I_NEW flag set.  The file system gets to fill it in
+ * before unlocking it via unlock_new_inode().
  */
-static struct inode *get_new_inode_fast(struct super_block *sb,
-				struct hlist_head *head, unsigned long ino)
+struct inode *iget_locked(struct super_block *sb, unsigned long ino)
 {
+	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
 
+	spin_lock(&inode_hash_lock);
+	inode = find_inode_fast(sb, head, ino);
+	spin_unlock(&inode_hash_lock);
+	if (inode) {
+		wait_on_inode(inode);
+		return inode;
+	}
+
 	inode = alloc_inode(sb);
 	if (inode) {
 		struct inode *old;
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		/* We released the lock, so.. */
 		old = find_inode_fast(sb, head, ino);
 		if (!old) {
 			inode->i_ino = ino;
-			hlist_add_head(&inode->i_hash, head);
-			__inode_sb_list_add(inode);
+			spin_lock(&inode->i_lock);
 			inode->i_state = I_NEW;
-			spin_unlock(&inode_lock);
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode->i_lock);
+			inode_sb_list_add(inode);
+			spin_unlock(&inode_hash_lock);
 
 			/* Return the locked inode with I_NEW set, the
 			 * caller is responsible for filling in the contents
@@ -963,13 +1063,14 @@
 		 * us. Use the old inode instead of the one we just
 		 * allocated.
 		 */
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_hash_lock);
 		destroy_inode(inode);
 		inode = old;
 		wait_on_inode(inode);
 	}
 	return inode;
 }
+EXPORT_SYMBOL(iget_locked);
 
 /*
  * search the inode cache for a matching inode number.
@@ -984,10 +1085,14 @@
 	struct hlist_node *node;
 	struct inode *inode;
 
+	spin_lock(&inode_hash_lock);
 	hlist_for_each_entry(inode, node, b, i_hash) {
-		if (inode->i_ino == ino && inode->i_sb == sb)
+		if (inode->i_ino == ino && inode->i_sb == sb) {
+			spin_unlock(&inode_hash_lock);
 			return 0;
+		}
 	}
+	spin_unlock(&inode_hash_lock);
 
 	return 1;
 }
@@ -1017,7 +1122,6 @@
 	static unsigned int counter;
 	ino_t res;
 
-	spin_lock(&inode_lock);
 	spin_lock(&iunique_lock);
 	do {
 		if (counter <= max_reserved)
@@ -1025,7 +1129,6 @@
 		res = counter++;
 	} while (!test_inode_iunique(sb, res));
 	spin_unlock(&iunique_lock);
-	spin_unlock(&inode_lock);
 
 	return res;
 }
@@ -1033,116 +1136,50 @@
 
 struct inode *igrab(struct inode *inode)
 {
-	spin_lock(&inode_lock);
-	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
+	spin_lock(&inode->i_lock);
+	if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
 		__iget(inode);
-	else
+		spin_unlock(&inode->i_lock);
+	} else {
+		spin_unlock(&inode->i_lock);
 		/*
 		 * Handle the case where s_op->clear_inode is not been
 		 * called yet, and somebody is calling igrab
 		 * while the inode is getting freed.
 		 */
 		inode = NULL;
-	spin_unlock(&inode_lock);
+	}
 	return inode;
 }
 EXPORT_SYMBOL(igrab);
 
 /**
- * ifind - internal function, you want ilookup5() or iget5().
- * @sb:		super block of file system to search
- * @head:       the head of the list to search
- * @test:	callback used for comparisons between inodes
- * @data:	opaque data pointer to pass to @test
- * @wait:	if true wait for the inode to be unlocked, if false do not
- *
- * ifind() searches for the inode specified by @data in the inode
- * cache. This is a generalized version of ifind_fast() for file systems where
- * the inode number is not sufficient for unique identification of an inode.
- *
- * If the inode is in the cache, the inode is returned with an incremented
- * reference count.
- *
- * Otherwise NULL is returned.
- *
- * Note, @test is called with the inode_lock held, so can't sleep.
- */
-static struct inode *ifind(struct super_block *sb,
-		struct hlist_head *head, int (*test)(struct inode *, void *),
-		void *data, const int wait)
-{
-	struct inode *inode;
-
-	spin_lock(&inode_lock);
-	inode = find_inode(sb, head, test, data);
-	if (inode) {
-		spin_unlock(&inode_lock);
-		if (likely(wait))
-			wait_on_inode(inode);
-		return inode;
-	}
-	spin_unlock(&inode_lock);
-	return NULL;
-}
-
-/**
- * ifind_fast - internal function, you want ilookup() or iget().
- * @sb:		super block of file system to search
- * @head:       head of the list to search
- * @ino:	inode number to search for
- *
- * ifind_fast() searches for the inode @ino in the inode cache. This is for
- * file systems where the inode number is sufficient for unique identification
- * of an inode.
- *
- * If the inode is in the cache, the inode is returned with an incremented
- * reference count.
- *
- * Otherwise NULL is returned.
- */
-static struct inode *ifind_fast(struct super_block *sb,
-		struct hlist_head *head, unsigned long ino)
-{
-	struct inode *inode;
-
-	spin_lock(&inode_lock);
-	inode = find_inode_fast(sb, head, ino);
-	if (inode) {
-		spin_unlock(&inode_lock);
-		wait_on_inode(inode);
-		return inode;
-	}
-	spin_unlock(&inode_lock);
-	return NULL;
-}
-
-/**
  * ilookup5_nowait - search for an inode in the inode cache
  * @sb:		super block of file system to search
  * @hashval:	hash value (usually inode number) to search for
  * @test:	callback used for comparisons between inodes
  * @data:	opaque data pointer to pass to @test
  *
- * ilookup5() uses ifind() to search for the inode specified by @hashval and
- * @data in the inode cache. This is a generalized version of ilookup() for
- * file systems where the inode number is not sufficient for unique
- * identification of an inode.
- *
+ * Search for the inode specified by @hashval and @data in the inode cache.
  * If the inode is in the cache, the inode is returned with an incremented
- * reference count.  Note, the inode lock is not waited upon so you have to be
- * very careful what you do with the returned inode.  You probably should be
- * using ilookup5() instead.
+ * reference count.
  *
- * Otherwise NULL is returned.
+ * Note: I_NEW is not waited upon so you have to be very careful what you do
+ * with the returned inode.  You probably should be using ilookup5() instead.
  *
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note2: @test is called with the inode_hash_lock held, so can't sleep.
  */
 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+	struct inode *inode;
 
-	return ifind(sb, head, test, data, 0);
+	spin_lock(&inode_hash_lock);
+	inode = find_inode(sb, head, test, data);
+	spin_unlock(&inode_hash_lock);
+
+	return inode;
 }
 EXPORT_SYMBOL(ilookup5_nowait);
 
@@ -1153,24 +1190,24 @@
  * @test:	callback used for comparisons between inodes
  * @data:	opaque data pointer to pass to @test
  *
- * ilookup5() uses ifind() to search for the inode specified by @hashval and
- * @data in the inode cache. This is a generalized version of ilookup() for
- * file systems where the inode number is not sufficient for unique
- * identification of an inode.
- *
- * If the inode is in the cache, the inode lock is waited upon and the inode is
+ * Search for the inode specified by @hashval and @data in the inode cache,
+ * and if the inode is in the cache, return the inode with an incremented
+ * reference count.  Waits on I_NEW before returning the inode.
  * returned with an incremented reference count.
  *
- * Otherwise NULL is returned.
+ * This is a generalized version of ilookup() for file systems where the
+ * inode number is not sufficient for unique identification of an inode.
  *
- * Note, @test is called with the inode_lock held, so can't sleep.
+ * Note: @test is called with the inode_hash_lock held, so can't sleep.
  */
 struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
-	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+	struct inode *inode = ilookup5_nowait(sb, hashval, test, data);
 
-	return ifind(sb, head, test, data, 1);
+	if (inode)
+		wait_on_inode(inode);
+	return inode;
 }
 EXPORT_SYMBOL(ilookup5);
 
@@ -1179,119 +1216,57 @@
  * @sb:		super block of file system to search
  * @ino:	inode number to search for
  *
- * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache.
- * This is for file systems where the inode number is sufficient for unique
- * identification of an inode.
- *
- * If the inode is in the cache, the inode is returned with an incremented
- * reference count.
- *
- * Otherwise NULL is returned.
+ * Search for the inode @ino in the inode cache, and if the inode is in the
+ * cache, the inode is returned with an incremented reference count.
  */
 struct inode *ilookup(struct super_block *sb, unsigned long ino)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
+	struct inode *inode;
 
-	return ifind_fast(sb, head, ino);
+	spin_lock(&inode_hash_lock);
+	inode = find_inode_fast(sb, head, ino);
+	spin_unlock(&inode_hash_lock);
+
+	if (inode)
+		wait_on_inode(inode);
+	return inode;
 }
 EXPORT_SYMBOL(ilookup);
 
-/**
- * iget5_locked - obtain an inode from a mounted file system
- * @sb:		super block of file system
- * @hashval:	hash value (usually inode number) to get
- * @test:	callback used for comparisons between inodes
- * @set:	callback used to initialize a new struct inode
- * @data:	opaque data pointer to pass to @test and @set
- *
- * iget5_locked() uses ifind() to search for the inode specified by @hashval
- * and @data in the inode cache and if present it is returned with an increased
- * reference count. This is a generalized version of iget_locked() for file
- * systems where the inode number is not sufficient for unique identification
- * of an inode.
- *
- * If the inode is not in cache, get_new_inode() is called to allocate a new
- * inode and this is returned locked, hashed, and with the I_NEW flag set. The
- * file system gets to fill it in before unlocking it via unlock_new_inode().
- *
- * Note both @test and @set are called with the inode_lock held, so can't sleep.
- */
-struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
-		int (*test)(struct inode *, void *),
-		int (*set)(struct inode *, void *), void *data)
-{
-	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
-	struct inode *inode;
-
-	inode = ifind(sb, head, test, data, 1);
-	if (inode)
-		return inode;
-	/*
-	 * get_new_inode() will do the right thing, re-trying the search
-	 * in case it had to block at any point.
-	 */
-	return get_new_inode(sb, head, test, set, data);
-}
-EXPORT_SYMBOL(iget5_locked);
-
-/**
- * iget_locked - obtain an inode from a mounted file system
- * @sb:		super block of file system
- * @ino:	inode number to get
- *
- * iget_locked() uses ifind_fast() to search for the inode specified by @ino in
- * the inode cache and if present it is returned with an increased reference
- * count. This is for file systems where the inode number is sufficient for
- * unique identification of an inode.
- *
- * If the inode is not in cache, get_new_inode_fast() is called to allocate a
- * new inode and this is returned locked, hashed, and with the I_NEW flag set.
- * The file system gets to fill it in before unlocking it via
- * unlock_new_inode().
- */
-struct inode *iget_locked(struct super_block *sb, unsigned long ino)
-{
-	struct hlist_head *head = inode_hashtable + hash(sb, ino);
-	struct inode *inode;
-
-	inode = ifind_fast(sb, head, ino);
-	if (inode)
-		return inode;
-	/*
-	 * get_new_inode_fast() will do the right thing, re-trying the search
-	 * in case it had to block at any point.
-	 */
-	return get_new_inode_fast(sb, head, ino);
-}
-EXPORT_SYMBOL(iget_locked);
-
 int insert_inode_locked(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 	ino_t ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 
-	inode->i_state |= I_NEW;
 	while (1) {
 		struct hlist_node *node;
 		struct inode *old = NULL;
-		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		hlist_for_each_entry(old, node, head, i_hash) {
 			if (old->i_ino != ino)
 				continue;
 			if (old->i_sb != sb)
 				continue;
-			if (old->i_state & (I_FREEING|I_WILL_FREE))
+			spin_lock(&old->i_lock);
+			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
+				spin_unlock(&old->i_lock);
 				continue;
+			}
 			break;
 		}
 		if (likely(!node)) {
+			spin_lock(&inode->i_lock);
+			inode->i_state |= I_NEW;
 			hlist_add_head(&inode->i_hash, head);
-			spin_unlock(&inode_lock);
+			spin_unlock(&inode->i_lock);
+			spin_unlock(&inode_hash_lock);
 			return 0;
 		}
 		__iget(old);
-		spin_unlock(&inode_lock);
+		spin_unlock(&old->i_lock);
+		spin_unlock(&inode_hash_lock);
 		wait_on_inode(old);
 		if (unlikely(!inode_unhashed(old))) {
 			iput(old);
@@ -1308,29 +1283,34 @@
 	struct super_block *sb = inode->i_sb;
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 
-	inode->i_state |= I_NEW;
-
 	while (1) {
 		struct hlist_node *node;
 		struct inode *old = NULL;
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_hash_lock);
 		hlist_for_each_entry(old, node, head, i_hash) {
 			if (old->i_sb != sb)
 				continue;
 			if (!test(old, data))
 				continue;
-			if (old->i_state & (I_FREEING|I_WILL_FREE))
+			spin_lock(&old->i_lock);
+			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
+				spin_unlock(&old->i_lock);
 				continue;
+			}
 			break;
 		}
 		if (likely(!node)) {
+			spin_lock(&inode->i_lock);
+			inode->i_state |= I_NEW;
 			hlist_add_head(&inode->i_hash, head);
-			spin_unlock(&inode_lock);
+			spin_unlock(&inode->i_lock);
+			spin_unlock(&inode_hash_lock);
 			return 0;
 		}
 		__iget(old);
-		spin_unlock(&inode_lock);
+		spin_unlock(&old->i_lock);
+		spin_unlock(&inode_hash_lock);
 		wait_on_inode(old);
 		if (unlikely(!inode_unhashed(old))) {
 			iput(old);
@@ -1375,47 +1355,35 @@
 	const struct super_operations *op = inode->i_sb->s_op;
 	int drop;
 
+	WARN_ON(inode->i_state & I_NEW);
+
 	if (op && op->drop_inode)
 		drop = op->drop_inode(inode);
 	else
 		drop = generic_drop_inode(inode);
 
-	if (!drop) {
-		if (sb->s_flags & MS_ACTIVE) {
-			inode->i_state |= I_REFERENCED;
-			if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
-				inode_lru_list_add(inode);
-			}
-			spin_unlock(&inode_lock);
-			return;
-		}
-		WARN_ON(inode->i_state & I_NEW);
-		inode->i_state |= I_WILL_FREE;
-		spin_unlock(&inode_lock);
-		write_inode_now(inode, 1);
-		spin_lock(&inode_lock);
-		WARN_ON(inode->i_state & I_NEW);
-		inode->i_state &= ~I_WILL_FREE;
-		__remove_inode_hash(inode);
+	if (!drop && (sb->s_flags & MS_ACTIVE)) {
+		inode->i_state |= I_REFERENCED;
+		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+			inode_lru_list_add(inode);
+		spin_unlock(&inode->i_lock);
+		return;
 	}
 
-	WARN_ON(inode->i_state & I_NEW);
+	if (!drop) {
+		inode->i_state |= I_WILL_FREE;
+		spin_unlock(&inode->i_lock);
+		write_inode_now(inode, 1);
+		spin_lock(&inode->i_lock);
+		WARN_ON(inode->i_state & I_NEW);
+		inode->i_state &= ~I_WILL_FREE;
+	}
+
 	inode->i_state |= I_FREEING;
-
-	/*
-	 * Move the inode off the IO lists and LRU once I_FREEING is
-	 * set so that it won't get moved back on there if it is dirty.
-	 */
 	inode_lru_list_del(inode);
-	list_del_init(&inode->i_wb_list);
+	spin_unlock(&inode->i_lock);
 
-	__inode_sb_list_del(inode);
-	spin_unlock(&inode_lock);
 	evict(inode);
-	remove_inode_hash(inode);
-	wake_up_inode(inode);
-	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
-	destroy_inode(inode);
 }
 
 /**
@@ -1432,7 +1400,7 @@
 	if (inode) {
 		BUG_ON(inode->i_state & I_CLEAR);
 
-		if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
+		if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
 			iput_final(inode);
 	}
 }
@@ -1611,9 +1579,8 @@
  * to recheck inode state.
  *
  * It doesn't matter if I_NEW is not set initially, a call to
- * wake_up_inode() after removing from the hash list will DTRT.
- *
- * This is called with inode_lock held.
+ * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
+ * will DTRT.
  */
 static void __wait_on_freeing_inode(struct inode *inode)
 {
@@ -1621,10 +1588,11 @@
 	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
 	wq = bit_waitqueue(&inode->i_state, __I_NEW);
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&inode_hash_lock);
 	schedule();
 	finish_wait(wq, &wait.wait);
-	spin_lock(&inode_lock);
+	spin_lock(&inode_hash_lock);
 }
 
 static __initdata unsigned long ihash_entries;

diff --git a/fs/internal.h b/fs/internal.h
index 8318059..b29c46e 100644
--- a/fs/internal.h
+++ b/fs/internal.h

@@ -125,6 +125,13 @@
 /*
  * inode.c
  */
+extern spinlock_t inode_sb_list_lock;
+
+/*
+ * fs-writeback.c
+ */
+extern void inode_wb_list_del(struct inode *inode);
+
 extern int get_nr_dirty_inodes(void);
 extern void evict_inodes(struct super_block *);
 extern int invalidate_inodes(struct super_block *, bool);

diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4f9cc04..3e93cdd 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c

@@ -31,7 +31,7 @@
  *   is used to release xattr name/value pair and detach from c->xattrindex.
  * reclaim_xattr_datum(c)
  *   is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
- *   memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold 
+ *   memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold
  *   is hard coded as 32KiB.
  * do_verify_xattr_datum(c, xd)
  *   is used to load the xdatum informations without name/value pair from the medium.

diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 03b8c24..edfea7a 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c

@@ -293,7 +293,7 @@
 	return ret;
 }
 
-/* called with inode_lock held */
+/* called with inode->i_lock held */
 static int logfs_drop_inode(struct inode *inode)
 {
 	struct logfs_super *super = logfs_super(inode->i_sb);

diff --git a/fs/namei.c b/fs/namei.c
index d0066e1..3cb616d 100644
--- a/fs/namei.c
+++ b/fs/namei.c

@@ -992,6 +992,12 @@
 	return 0;
 }
 
+static inline bool managed_dentry_might_block(struct dentry *dentry)
+{
+	return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
+		dentry->d_op->d_manage(dentry, true) < 0);
+}
+
 /*
  * Skip to top of mountpoint pile in rcuwalk mode.  We abort the rcu-walk if we
  * meet a managed dentry and we're not walking to "..".  True is returned to
@@ -1000,19 +1006,26 @@
 static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
 			       struct inode **inode, bool reverse_transit)
 {
-	while (d_mountpoint(path->dentry)) {
+	for (;;) {
 		struct vfsmount *mounted;
-		if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
-		    !reverse_transit &&
-		    path->dentry->d_op->d_manage(path->dentry, true) < 0)
+		/*
+		 * Don't forget we might have a non-mountpoint managed dentry
+		 * that wants to block transit.
+		 */
+		*inode = path->dentry->d_inode;
+		if (!reverse_transit &&
+		     unlikely(managed_dentry_might_block(path->dentry)))
 			return false;
+
+		if (!d_mountpoint(path->dentry))
+			break;
+
 		mounted = __lookup_mnt(path->mnt, path->dentry, 1);
 		if (!mounted)
 			break;
 		path->mnt = mounted;
 		path->dentry = mounted->mnt_root;
 		nd->seq = read_seqcount_begin(&path->dentry->d_seq);
-		*inode = path->dentry->d_inode;
 	}
 
 	if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index abdf38d..7237672 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c

@@ -44,6 +44,7 @@
 /* #define NFS_DEBUG_VERBOSE 1 */
 
 static int nfs_opendir(struct inode *, struct file *);
+static int nfs_closedir(struct inode *, struct file *);
 static int nfs_readdir(struct file *, void *, filldir_t);
 static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
 static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
@@ -64,7 +65,7 @@
 	.read		= generic_read_dir,
 	.readdir	= nfs_readdir,
 	.open		= nfs_opendir,
-	.release	= nfs_release,
+	.release	= nfs_closedir,
 	.fsync		= nfs_fsync_dir,
 };
 
@@ -133,13 +134,35 @@
 
 #endif /* CONFIG_NFS_V4 */
 
+static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred)
+{
+	struct nfs_open_dir_context *ctx;
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (ctx != NULL) {
+		ctx->duped = 0;
+		ctx->dir_cookie = 0;
+		ctx->dup_cookie = 0;
+		ctx->cred = get_rpccred(cred);
+	} else
+		ctx = ERR_PTR(-ENOMEM);
+	return ctx;
+}
+
+static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
+{
+	put_rpccred(ctx->cred);
+	kfree(ctx);
+}
+
 /*
  * Open file
  */
 static int
 nfs_opendir(struct inode *inode, struct file *filp)
 {
-	int res;
+	int res = 0;
+	struct nfs_open_dir_context *ctx;
+	struct rpc_cred *cred;
 
 	dfprintk(FILE, "NFS: open dir(%s/%s)\n",
 			filp->f_path.dentry->d_parent->d_name.name,
@@ -147,8 +170,15 @@
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 
-	/* Call generic open code in order to cache credentials */
-	res = nfs_open(inode, filp);
+	cred = rpc_lookup_cred();
+	if (IS_ERR(cred))
+		return PTR_ERR(cred);
+	ctx = alloc_nfs_open_dir_context(cred);
+	if (IS_ERR(ctx)) {
+		res = PTR_ERR(ctx);
+		goto out;
+	}
+	filp->private_data = ctx;
 	if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
 		/* This is a mountpoint, so d_revalidate will never
 		 * have been called, so we need to refresh the
@@ -156,9 +186,18 @@
 		 */
 		__nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	}
+out:
+	put_rpccred(cred);
 	return res;
 }
 
+static int
+nfs_closedir(struct inode *inode, struct file *filp)
+{
+	put_nfs_open_dir_context(filp->private_data);
+	return 0;
+}
+
 struct nfs_cache_array_entry {
 	u64 cookie;
 	u64 ino;
@@ -284,19 +323,20 @@
 {
 	loff_t diff = desc->file->f_pos - desc->current_index;
 	unsigned int index;
+	struct nfs_open_dir_context *ctx = desc->file->private_data;
 
 	if (diff < 0)
 		goto out_eof;
 	if (diff >= array->size) {
 		if (array->eof_index >= 0)
 			goto out_eof;
-		desc->current_index += array->size;
 		return -EAGAIN;
 	}
 
 	index = (unsigned int)diff;
 	*desc->dir_cookie = array->array[index].cookie;
 	desc->cache_entry_index = index;
+	ctx->duped = 0;
 	return 0;
 out_eof:
 	desc->eof = 1;
@@ -307,10 +347,18 @@
 int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
 {
 	int i;
+	loff_t new_pos;
 	int status = -EAGAIN;
+	struct nfs_open_dir_context *ctx = desc->file->private_data;
 
 	for (i = 0; i < array->size; i++) {
 		if (array->array[i].cookie == *desc->dir_cookie) {
+			new_pos = desc->current_index + i;
+			if (new_pos < desc->file->f_pos) {
+				ctx->dup_cookie = *desc->dir_cookie;
+				ctx->duped = 1;
+			}
+			desc->file->f_pos = new_pos;
 			desc->cache_entry_index = i;
 			return 0;
 		}
@@ -342,6 +390,7 @@
 
 	if (status == -EAGAIN) {
 		desc->last_cookie = array->last_cookie;
+		desc->current_index += array->size;
 		desc->page_index++;
 	}
 	nfs_readdir_release_array(desc->page);
@@ -354,7 +403,8 @@
 int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
 			struct nfs_entry *entry, struct file *file, struct inode *inode)
 {
-	struct rpc_cred	*cred = nfs_file_cred(file);
+	struct nfs_open_dir_context *ctx = file->private_data;
+	struct rpc_cred	*cred = ctx->cred;
 	unsigned long	timestamp, gencount;
 	int		error;
 
@@ -693,6 +743,20 @@
 	int i = 0;
 	int res = 0;
 	struct nfs_cache_array *array = NULL;
+	struct nfs_open_dir_context *ctx = file->private_data;
+
+	if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) {
+		if (printk_ratelimit()) {
+			pr_notice("NFS: directory %s/%s contains a readdir loop.  "
+				"Please contact your server vendor.  "
+				"Offending cookie: %llu\n",
+				file->f_dentry->d_parent->d_name.name,
+				file->f_dentry->d_name.name,
+				*desc->dir_cookie);
+		}
+		res = -ELOOP;
+		goto out;
+	}
 
 	array = nfs_readdir_get_array(desc->page);
 	if (IS_ERR(array)) {
@@ -785,6 +849,7 @@
 	struct inode	*inode = dentry->d_inode;
 	nfs_readdir_descriptor_t my_desc,
 			*desc = &my_desc;
+	struct nfs_open_dir_context *dir_ctx = filp->private_data;
 	int res;
 
 	dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -801,7 +866,7 @@
 	memset(desc, 0, sizeof(*desc));
 
 	desc->file = filp;
-	desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie;
+	desc->dir_cookie = &dir_ctx->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
 	desc->plus = NFS_USE_READDIRPLUS(inode);
 
@@ -853,6 +918,7 @@
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
+	struct nfs_open_dir_context *dir_ctx = filp->private_data;
 
 	dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
 			dentry->d_parent->d_name.name,
@@ -872,7 +938,8 @@
 	}
 	if (offset != filp->f_pos) {
 		filp->f_pos = offset;
-		nfs_file_open_context(filp)->dir_cookie = 0;
+		dir_ctx->dir_cookie = 0;
+		dir_ctx->duped = 0;
 	}
 out:
 	mutex_unlock(&inode->i_mutex);
@@ -1068,7 +1135,7 @@
 	if (fhandle == NULL || fattr == NULL)
 		goto out_error;
 
-	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+	error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
 	if (error)
 		goto out_bad;
 	if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1224,7 +1291,7 @@
 	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
 	nfs_block_sillyrename(parent);
-	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+	error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
 	if (error == -ENOENT)
 		goto no_entry;
 	if (error < 0) {
@@ -1562,7 +1629,7 @@
 	if (dentry->d_inode)
 		goto out;
 	if (fhandle->size == 0) {
-		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
+		error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
 		if (error)
 			goto out_error;
 	}

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d85a534..3ac5bd6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c

@@ -326,6 +326,9 @@
 		ret = xchg(&ctx->error, 0);
 	if (!ret && status < 0)
 		ret = status;
+	if (!ret && !datasync)
+		/* application has asked for meta-data sync */
+		ret = pnfs_layoutcommit_inode(inode, true);
 	return ret;
 }
 

diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 1084792..dcb6154 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c

@@ -222,6 +222,10 @@
 		goto out;
 	}
 
+	if (fattr->valid & NFS_ATTR_FATTR_FSID &&
+	    !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+		memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
+
 	inode = nfs_fhget(sb, mntfh, fattr);
 	if (IS_ERR(inode)) {
 		dprintk("nfs_get_root: get root inode failed\n");

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 01768e5..57bb31a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c

@@ -254,7 +254,9 @@
 	struct inode *inode = ERR_PTR(-ENOENT);
 	unsigned long hash;
 
-	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0)
+	nfs_attr_check_mountpoint(sb, fattr);
+
+	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0)
 		goto out_no_inode;
 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
 		goto out_no_inode;
@@ -298,8 +300,8 @@
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
 				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			/* Deal with crossing mountpoints */
-			if ((fattr->valid & NFS_ATTR_FATTR_FSID)
-					&& !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
+			if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
+					fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
 				if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
 					inode->i_op = &nfs_referral_inode_operations;
 				else
@@ -639,7 +641,6 @@
 		ctx->mode = f_mode;
 		ctx->flags = 0;
 		ctx->error = 0;
-		ctx->dir_cookie = 0;
 		nfs_init_lock_context(&ctx->lock_context);
 		ctx->lock_context.open_context = ctx;
 		INIT_LIST_HEAD(&ctx->list);
@@ -1471,6 +1472,7 @@
 	nfsi->delegation_state = 0;
 	init_rwsem(&nfsi->rwsem);
 	nfsi->layout = NULL;
+	atomic_set(&nfsi->commits_outstanding, 0);
 #endif
 }
 

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 72e0bdd..ce118ce 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h

@@ -39,6 +39,12 @@
 	return 0;
 }
 
+static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
+{
+	if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
+		fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
+}
+
 struct nfs_clone_mount {
 	const struct super_block *sb;
 	const struct dentry *dentry;
@@ -214,6 +220,7 @@
 /* nfs4proc.c */
 #ifdef CONFIG_NFS_V4
 extern struct rpc_procinfo nfs4_procedures[];
+void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *);
 #endif
 
 extern int nfs4_init_ds_session(struct nfs_client *clp);
@@ -276,11 +283,25 @@
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 
 /* write.c */
+extern void nfs_commit_free(struct nfs_write_data *p);
 extern int nfs_initiate_write(struct nfs_write_data *data,
 			      struct rpc_clnt *clnt,
 			      const struct rpc_call_ops *call_ops,
 			      int how);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_initiate_commit(struct nfs_write_data *data,
+			       struct rpc_clnt *clnt,
+			       const struct rpc_call_ops *call_ops,
+			       int how);
+extern void nfs_init_commit(struct nfs_write_data *data,
+			    struct list_head *head,
+			    struct pnfs_layout_segment *lseg);
+void nfs_retry_commit(struct list_head *page_list,
+		      struct pnfs_layout_segment *lseg);
+void nfs_commit_clear_lock(struct nfs_inode *nfsi);
+void nfs_commitdata_release(void *data);
+void nfs_commit_release_pages(struct nfs_write_data *data);
+
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
 		struct page *, struct page *);
@@ -296,12 +317,14 @@
 			    rpc_authflavor_t authflavour,
 			    int noresvport);
 extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
-extern int _nfs4_call_sync(struct nfs_server *server,
+extern int _nfs4_call_sync(struct rpc_clnt *clnt,
+			   struct nfs_server *server,
 			   struct rpc_message *msg,
 			   struct nfs4_sequence_args *args,
 			   struct nfs4_sequence_res *res,
 			   int cache_reply);
-extern int _nfs4_call_sync_session(struct nfs_server *server,
+extern int _nfs4_call_sync_session(struct rpc_clnt *clnt,
+				   struct nfs_server *server,
 				   struct rpc_message *msg,
 				   struct nfs4_sequence_args *args,
 				   struct nfs4_sequence_res *res,

diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index bf1c680..ad92bf7 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c

@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/vfs.h>
+#include <linux/sunrpc/gss_api.h>
 #include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
@@ -27,7 +28,8 @@
 
 static struct vfsmount *nfs_do_submount(struct dentry *dentry,
 					struct nfs_fh *fh,
-					struct nfs_fattr *fattr);
+					struct nfs_fattr *fattr,
+					rpc_authflavor_t authflavor);
 
 /*
  * nfs_path - reconstruct the path given an arbitrary dentry
@@ -116,6 +118,100 @@
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
+#ifdef CONFIG_NFS_V4
+static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode)
+{
+	struct gss_api_mech *mech;
+	struct xdr_netobj oid;
+	int i;
+	rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
+
+	for (i = 0; i < flavors->num_flavors; i++) {
+		struct nfs4_secinfo_flavor *flavor;
+		flavor = &flavors->flavors[i];
+
+		if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
+			pseudoflavor = flavor->flavor;
+			break;
+		} else if (flavor->flavor == RPC_AUTH_GSS) {
+			oid.len  = flavor->gss.sec_oid4.len;
+			oid.data = flavor->gss.sec_oid4.data;
+			mech = gss_mech_get_by_OID(&oid);
+			if (!mech)
+				continue;
+			pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
+			gss_mech_put(mech);
+			break;
+		}
+	}
+
+	return pseudoflavor;
+}
+
+static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry)
+{
+	int status = 0;
+	struct page *page;
+	struct nfs4_secinfo_flavors *flavors;
+	int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
+	rpc_authflavor_t flavor = RPC_AUTH_UNIX;
+
+	secinfo = NFS_PROTO(parent->d_inode)->secinfo;
+	if (secinfo != NULL) {
+		page = alloc_page(GFP_KERNEL);
+		if (!page) {
+			status = -ENOMEM;
+			goto out;
+		}
+		flavors = page_address(page);
+		status = secinfo(parent->d_inode, &dentry->d_name, flavors);
+		flavor = nfs_find_best_sec(flavors, dentry->d_inode);
+		put_page(page);
+	}
+
+	return flavor;
+
+out:
+	status = -ENOMEM;
+	return status;
+}
+
+static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
+				     struct dentry *dentry, struct path *path,
+				     struct nfs_fh *fh, struct nfs_fattr *fattr)
+{
+	rpc_authflavor_t flavor;
+	struct rpc_clnt *clone;
+	struct rpc_auth *auth;
+	int err;
+
+	flavor = nfs_negotiate_security(parent, path->dentry);
+	if (flavor < 0)
+		goto out;
+	clone  = rpc_clone_client(server->client);
+	auth   = rpcauth_create(flavor, clone);
+	if (!auth) {
+		flavor = -EIO;
+		goto out;
+	}
+	err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
+						  &path->dentry->d_name,
+						  fh, fattr);
+	if (err < 0)
+		flavor = err;
+out:
+	return flavor;
+}
+#else /* CONFIG_NFS_V4 */
+static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server,
+				     struct dentry *parent, struct dentry *dentry,
+				     struct path *path, struct nfs_fh *fh,
+				     struct nfs_fattr *fattr)
+{
+	return -EPERM;
+}
+#endif /* CONFIG_NFS_V4 */
+
 /*
  * nfs_d_automount - Handle crossing a mountpoint on the server
  * @path - The mountpoint
@@ -136,6 +232,7 @@
 	struct nfs_fh *fh = NULL;
 	struct nfs_fattr *fattr = NULL;
 	int err;
+	rpc_authflavor_t flavor = 1;
 
 	dprintk("--> nfs_d_automount()\n");
 
@@ -153,9 +250,16 @@
 
 	/* Look it up again to get its attributes */
 	parent = dget_parent(path->dentry);
-	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
+	err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
 						  &path->dentry->d_name,
 						  fh, fattr);
+	if (err == -EPERM) {
+		flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr);
+		if (flavor < 0)
+			err = flavor;
+		else
+			err = 0;
+	}
 	dput(parent);
 	if (err != 0) {
 		mnt = ERR_PTR(err);
@@ -165,7 +269,7 @@
 	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
 		mnt = nfs_do_refmount(path->dentry);
 	else
-		mnt = nfs_do_submount(path->dentry, fh, fattr);
+		mnt = nfs_do_submount(path->dentry, fh, fattr, flavor);
 	if (IS_ERR(mnt))
 		goto out;
 
@@ -232,17 +336,20 @@
  * @dentry - parent directory
  * @fh - filehandle for new root dentry
  * @fattr - attributes for new root inode
+ * @authflavor - security flavor to use when performing the mount
  *
  */
 static struct vfsmount *nfs_do_submount(struct dentry *dentry,
 					struct nfs_fh *fh,
-					struct nfs_fattr *fattr)
+					struct nfs_fattr *fattr,
+					rpc_authflavor_t authflavor)
 {
 	struct nfs_clone_mount mountdata = {
 		.sb = dentry->d_sb,
 		.dentry = dentry,
 		.fh = fh,
 		.fattr = fattr,
+		.authflavor = authflavor,
 	};
 	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
 	char *page = (char *) __get_free_page(GFP_USER);

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d0c80d8..38053d8 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c

@@ -141,7 +141,7 @@
 }
 
 static int
-nfs3_proc_lookup(struct inode *dir, struct qstr *name,
+nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
 		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	struct nfs3_diropargs	arg = {

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c64be1c..e1c261d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h

@@ -57,7 +57,8 @@
 struct nfs4_minor_version_ops {
 	u32	minor_version;
 
-	int	(*call_sync)(struct nfs_server *server,
+	int	(*call_sync)(struct rpc_clnt *clnt,
+			struct nfs_server *server,
 			struct rpc_message *msg,
 			struct nfs4_sequence_args *args,
 			struct nfs4_sequence_res *res,
@@ -262,6 +263,8 @@
 extern int nfs4_init_session(struct nfs_server *server);
 extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
 		struct nfs_fsinfo *fsinfo);
+extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
+				  bool sync);
 
 static inline bool
 is_ds_only_client(struct nfs_client *clp)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 4285584..6f8192f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c

@@ -154,6 +154,23 @@
 }
 
 /*
+ * We reference the rpc_cred of the first WRITE that triggers the need for
+ * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
+ * rfc5661 is not clear about which credential should be used.
+ */
+static void
+filelayout_set_layoutcommit(struct nfs_write_data *wdata)
+{
+	if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
+	    wdata->res.verf->committed == NFS_FILE_SYNC)
+		return;
+
+	pnfs_set_layoutcommit(wdata);
+	dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
+		(unsigned long) wdata->lseg->pls_end_pos);
+}
+
+/*
  * Call ops for the async read/write cases
  * In the case of dense layouts, the offset needs to be reset to its
  * original value.
@@ -210,6 +227,38 @@
 		return -EAGAIN;
 	}
 
+	filelayout_set_layoutcommit(data);
+	return 0;
+}
+
+/* Fake up some data that will cause nfs_commit_release to retry the writes. */
+static void prepare_to_resend_writes(struct nfs_write_data *data)
+{
+	struct nfs_page *first = nfs_list_entry(data->pages.next);
+
+	data->task.tk_status = 0;
+	memcpy(data->verf.verifier, first->wb_verf.verifier,
+	       sizeof(first->wb_verf.verifier));
+	data->verf.verifier[0]++; /* ensure verifier mismatch */
+}
+
+static int filelayout_commit_done_cb(struct rpc_task *task,
+				     struct nfs_write_data *data)
+{
+	int reset = 0;
+
+	if (filelayout_async_handle_error(task, data->args.context->state,
+					  data->ds_clp, &reset) == -EAGAIN) {
+		dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
+			__func__, data->ds_clp, data->ds_clp->cl_session);
+		if (reset) {
+			prepare_to_resend_writes(data);
+			filelayout_set_lo_fail(data->lseg);
+		} else
+			nfs_restart_rpc(task, data->ds_clp);
+		return -EAGAIN;
+	}
+
 	return 0;
 }
 
@@ -240,6 +289,16 @@
 	wdata->mds_ops->rpc_release(data);
 }
 
+static void filelayout_commit_release(void *data)
+{
+	struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+	nfs_commit_release_pages(wdata);
+	if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
+		nfs_commit_clear_lock(NFS_I(wdata->inode));
+	nfs_commitdata_release(wdata);
+}
+
 struct rpc_call_ops filelayout_read_call_ops = {
 	.rpc_call_prepare = filelayout_read_prepare,
 	.rpc_call_done = filelayout_read_call_done,
@@ -252,6 +311,12 @@
 	.rpc_release = filelayout_write_release,
 };
 
+struct rpc_call_ops filelayout_commit_call_ops = {
+	.rpc_call_prepare = filelayout_write_prepare,
+	.rpc_call_done = filelayout_write_call_done,
+	.rpc_release = filelayout_commit_release,
+};
+
 static enum pnfs_try_status
 filelayout_read_pagelist(struct nfs_read_data *data)
 {
@@ -320,10 +385,6 @@
 		data->inode->i_ino, sync, (size_t) data->args.count, offset,
 		ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
 
-	/* We can't handle commit to ds yet */
-	if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
-		data->args.stable = NFS_FILE_SYNC;
-
 	data->write_done_cb = filelayout_write_done_cb;
 	data->ds_clp = ds->ds_clp;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
@@ -441,12 +502,33 @@
 			 struct nfs4_layoutget_res *lgr,
 			 struct nfs4_deviceid *id)
 {
-	uint32_t *p = (uint32_t *)lgr->layout.buf;
+	struct xdr_stream stream;
+	struct xdr_buf buf = {
+		.pages =  lgr->layoutp->pages,
+		.page_len =  lgr->layoutp->len,
+		.buflen =  lgr->layoutp->len,
+		.len = lgr->layoutp->len,
+	};
+	struct page *scratch;
+	__be32 *p;
 	uint32_t nfl_util;
 	int i;
 
 	dprintk("%s: set_layout_map Begin\n", __func__);
 
+	scratch = alloc_page(GFP_KERNEL);
+	if (!scratch)
+		return -ENOMEM;
+
+	xdr_init_decode(&stream, &buf, NULL);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
+
+	/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
+	 * num_fh (4) */
+	p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20);
+	if (unlikely(!p))
+		goto out_err;
+
 	memcpy(id, p, sizeof(*id));
 	p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
 	print_deviceid(id);
@@ -468,32 +550,57 @@
 		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
 		fl->pattern_offset);
 
+	if (!fl->num_fh)
+		goto out_err;
+
 	fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
 			       GFP_KERNEL);
 	if (!fl->fh_array)
-		return -ENOMEM;
+		goto out_err;
 
 	for (i = 0; i < fl->num_fh; i++) {
 		/* Do we want to use a mempool here? */
 		fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
-		if (!fl->fh_array[i]) {
-			filelayout_free_fh_array(fl);
-			return -ENOMEM;
-		}
+		if (!fl->fh_array[i])
+			goto out_err_free;
+
+		p = xdr_inline_decode(&stream, 4);
+		if (unlikely(!p))
+			goto out_err_free;
 		fl->fh_array[i]->size = be32_to_cpup(p++);
 		if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
 			printk(KERN_ERR "Too big fh %d received %d\n",
 			       i, fl->fh_array[i]->size);
-			filelayout_free_fh_array(fl);
-			return -EIO;
+			goto out_err_free;
 		}
+
+		p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
+		if (unlikely(!p))
+			goto out_err_free;
 		memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
-		p += XDR_QUADLEN(fl->fh_array[i]->size);
 		dprintk("DEBUG: %s: fh len %d\n", __func__,
 			fl->fh_array[i]->size);
 	}
 
+	__free_page(scratch);
 	return 0;
+
+out_err_free:
+	filelayout_free_fh_array(fl);
+out_err:
+	__free_page(scratch);
+	return -EIO;
+}
+
+static void
+filelayout_free_lseg(struct pnfs_layout_segment *lseg)
+{
+	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
+
+	dprintk("--> %s\n", __func__);
+	nfs4_fl_put_deviceid(fl->dsaddr);
+	kfree(fl->commit_buckets);
+	_filelayout_free_lseg(fl);
 }
 
 static struct pnfs_layout_segment *
@@ -514,19 +621,30 @@
 		_filelayout_free_lseg(fl);
 		return NULL;
 	}
+
+	/* This assumes there is only one IOMODE_RW lseg.  What
+	 * we really want to do is have a layout_hdr level
+	 * dictionary of <multipath_list4, fh> keys, each
+	 * associated with a struct list_head, populated by calls
+	 * to filelayout_write_pagelist().
+	 * */
+	if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
+		int i;
+		int size = (fl->stripe_type == STRIPE_SPARSE) ?
+			fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+		fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
+		if (!fl->commit_buckets) {
+			filelayout_free_lseg(&fl->generic_hdr);
+			return NULL;
+		}
+		fl->number_of_buckets = size;
+		for (i = 0; i < size; i++)
+			INIT_LIST_HEAD(&fl->commit_buckets[i]);
+	}
 	return &fl->generic_hdr;
 }
 
-static void
-filelayout_free_lseg(struct pnfs_layout_segment *lseg)
-{
-	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
-
-	dprintk("--> %s\n", __func__);
-	nfs4_fl_put_deviceid(fl->dsaddr);
-	_filelayout_free_lseg(fl);
-}
-
 /*
  * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
  *
@@ -552,6 +670,191 @@
 	return (p_stripe == r_stripe);
 }
 
+static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
+{
+	return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
+}
+
+static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
+{
+	if (fl->stripe_type == STRIPE_SPARSE)
+		return nfs4_fl_calc_ds_index(&fl->generic_hdr, j);
+	else
+		return j;
+}
+
+struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
+{
+	struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
+	struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
+	u32 i, j;
+	struct list_head *list;
+
+	/* Note that we are calling nfs4_fl_calc_j_index on each page
+	 * that ends up being committed to a data server.  An attractive
+	 * alternative is to add a field to nfs_write_data and nfs_page
+	 * to store the value calculated in filelayout_write_pagelist
+	 * and just use that here.
+	 */
+	j = nfs4_fl_calc_j_index(lseg,
+				 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
+	i = select_bucket_index(fl, j);
+	list = &fl->commit_buckets[i];
+	if (list_empty(list)) {
+		/* Non-empty buckets hold a reference on the lseg */
+		get_lseg(lseg);
+	}
+	return list;
+}
+
+static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
+{
+	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
+
+	if (flseg->stripe_type == STRIPE_SPARSE)
+		return i;
+	else
+		return nfs4_fl_calc_ds_index(lseg, i);
+}
+
+static struct nfs_fh *
+select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
+{
+	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
+
+	if (flseg->stripe_type == STRIPE_SPARSE) {
+		if (flseg->num_fh == 1)
+			i = 0;
+		else if (flseg->num_fh == 0)
+			/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
+			return NULL;
+	}
+	return flseg->fh_array[i];
+}
+
+static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
+{
+	struct pnfs_layout_segment *lseg = data->lseg;
+	struct nfs4_pnfs_ds *ds;
+	u32 idx;
+	struct nfs_fh *fh;
+
+	idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
+	ds = nfs4_fl_prepare_ds(lseg, idx);
+	if (!ds) {
+		printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+		set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+		set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+		prepare_to_resend_writes(data);
+		data->mds_ops->rpc_release(data);
+		return -EAGAIN;
+	}
+	dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
+	data->write_done_cb = filelayout_commit_done_cb;
+	data->ds_clp = ds->ds_clp;
+	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
+	if (fh)
+		data->args.fh = fh;
+	return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
+				   &filelayout_commit_call_ops, how);
+}
+
+/*
+ * This is only useful while we are using whole file layouts.
+ */
+static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
+{
+	struct pnfs_layout_segment *lseg, *rv = NULL;
+
+	spin_lock(&inode->i_lock);
+	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
+		if (lseg->pls_range.iomode == IOMODE_RW)
+			rv = get_lseg(lseg);
+	spin_unlock(&inode->i_lock);
+	return rv;
+}
+
+static int alloc_ds_commits(struct inode *inode, struct list_head *list)
+{
+	struct pnfs_layout_segment *lseg;
+	struct nfs4_filelayout_segment *fl;
+	struct nfs_write_data *data;
+	int i, j;
+
+	/* Won't need this when non-whole file layout segments are supported
+	 * instead we will use a pnfs_layout_hdr structure */
+	lseg = find_only_write_lseg(inode);
+	if (!lseg)
+		return 0;
+	fl = FILELAYOUT_LSEG(lseg);
+	for (i = 0; i < fl->number_of_buckets; i++) {
+		if (list_empty(&fl->commit_buckets[i]))
+			continue;
+		data = nfs_commitdata_alloc();
+		if (!data)
+			goto out_bad;
+		data->ds_commit_index = i;
+		data->lseg = lseg;
+		list_add(&data->pages, list);
+	}
+	put_lseg(lseg);
+	return 0;
+
+out_bad:
+	for (j = i; j < fl->number_of_buckets; j++) {
+		if (list_empty(&fl->commit_buckets[i]))
+			continue;
+		nfs_retry_commit(&fl->commit_buckets[i], lseg);
+		put_lseg(lseg);  /* associated with emptying bucket */
+	}
+	put_lseg(lseg);
+	/* Caller will clean up entries put on list */
+	return -ENOMEM;
+}
+
+/* This follows nfs_commit_list pretty closely */
+static int
+filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
+			   int how)
+{
+	struct nfs_write_data	*data, *tmp;
+	LIST_HEAD(list);
+
+	if (!list_empty(mds_pages)) {
+		data = nfs_commitdata_alloc();
+		if (!data)
+			goto out_bad;
+		data->lseg = NULL;
+		list_add(&data->pages, &list);
+	}
+
+	if (alloc_ds_commits(inode, &list))
+		goto out_bad;
+
+	list_for_each_entry_safe(data, tmp, &list, pages) {
+		list_del_init(&data->pages);
+		atomic_inc(&NFS_I(inode)->commits_outstanding);
+		if (!data->lseg) {
+			nfs_init_commit(data, mds_pages, NULL);
+			nfs_initiate_commit(data, NFS_CLIENT(inode),
+					    data->mds_ops, how);
+		} else {
+			nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg);
+			filelayout_initiate_commit(data, how);
+		}
+	}
+	return 0;
+ out_bad:
+	list_for_each_entry_safe(data, tmp, &list, pages) {
+		nfs_retry_commit(&data->pages, data->lseg);
+		list_del_init(&data->pages);
+		nfs_commit_free(data);
+	}
+	nfs_retry_commit(mds_pages, NULL);
+	nfs_commit_clear_lock(NFS_I(inode));
+	return -ENOMEM;
+}
+
 static struct pnfs_layoutdriver_type filelayout_type = {
 	.id			= LAYOUT_NFSV4_1_FILES,
 	.name			= "LAYOUT_NFSV4_1_FILES",
@@ -559,6 +862,9 @@
 	.alloc_lseg		= filelayout_alloc_lseg,
 	.free_lseg		= filelayout_free_lseg,
 	.pg_test		= filelayout_pg_test,
+	.mark_pnfs_commit	= filelayout_mark_pnfs_commit,
+	.choose_commit_list	= filelayout_choose_commit_list,
+	.commit_pagelist	= filelayout_commit_pagelist,
 	.read_pagelist		= filelayout_read_pagelist,
 	.write_pagelist		= filelayout_write_pagelist,
 };

diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index ee0c907..085a354 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h

@@ -79,6 +79,8 @@
 	struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
 	unsigned int num_fh;
 	struct nfs_fh **fh_array;
+	struct list_head *commit_buckets; /* Sort commits to ds */
+	int number_of_buckets;
 };
 
 static inline struct nfs4_filelayout_segment *

diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 68143c1..de5350f 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c

@@ -261,7 +261,7 @@
  * Currently only support ipv4, and one multi-path address.
  */
 static struct nfs4_pnfs_ds *
-decode_and_add_ds(__be32 **pp, struct inode *inode)
+decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
 {
 	struct nfs4_pnfs_ds *ds = NULL;
 	char *buf;
@@ -269,25 +269,34 @@
 	u32 ip_addr, port;
 	int nlen, rlen, i;
 	int tmp[2];
-	__be32 *r_netid, *r_addr, *p = *pp;
+	__be32 *p;
 
 	/* r_netid */
+	p = xdr_inline_decode(streamp, 4);
+	if (unlikely(!p))
+		goto out_err;
 	nlen = be32_to_cpup(p++);
-	r_netid = p;
-	p += XDR_QUADLEN(nlen);
 
-	/* r_addr */
-	rlen = be32_to_cpup(p++);
-	r_addr = p;
-	p += XDR_QUADLEN(rlen);
-	*pp = p;
+	p = xdr_inline_decode(streamp, nlen);
+	if (unlikely(!p))
+		goto out_err;
 
 	/* Check that netid is "tcp" */
-	if (nlen != 3 ||  memcmp((char *)r_netid, "tcp", 3)) {
+	if (nlen != 3 ||  memcmp((char *)p, "tcp", 3)) {
 		dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
 		goto out_err;
 	}
 
+	/* r_addr */
+	p = xdr_inline_decode(streamp, 4);
+	if (unlikely(!p))
+		goto out_err;
+	rlen = be32_to_cpup(p);
+
+	p = xdr_inline_decode(streamp, rlen);
+	if (unlikely(!p))
+		goto out_err;
+
 	/* ipv6 length plus port is legal */
 	if (rlen > INET6_ADDRSTRLEN + 8) {
 		dprintk("%s: Invalid address, length %d\n", __func__,
@@ -300,7 +309,7 @@
 		goto out_err;
 	}
 	buf[rlen] = '\0';
-	memcpy(buf, r_addr, rlen);
+	memcpy(buf, p, rlen);
 
 	/* replace the port dots with dashes for the in4_pton() delimiter*/
 	for (i = 0; i < 2; i++) {
@@ -336,90 +345,154 @@
 static struct nfs4_file_layout_dsaddr*
 decode_device(struct inode *ino, struct pnfs_device *pdev)
 {
-	int i, dummy;
+	int i;
 	u32 cnt, num;
 	u8 *indexp;
-	__be32 *p = (__be32 *)pdev->area, *indicesp;
-	struct nfs4_file_layout_dsaddr *dsaddr;
+	__be32 *p;
+	u8 *stripe_indices;
+	u8 max_stripe_index;
+	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
+	struct xdr_stream stream;
+	struct xdr_buf buf = {
+		.pages = pdev->pages,
+		.page_len = pdev->pglen,
+		.buflen = pdev->pglen,
+		.len = pdev->pglen,
+	};
+	struct page *scratch;
+
+	/* set up xdr stream */
+	scratch = alloc_page(GFP_KERNEL);
+	if (!scratch)
+		goto out_err;
+
+	xdr_init_decode(&stream, &buf, NULL);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	/* Get the stripe count (number of stripe index) */
-	cnt = be32_to_cpup(p++);
+	p = xdr_inline_decode(&stream, 4);
+	if (unlikely(!p))
+		goto out_err_free_scratch;
+
+	cnt = be32_to_cpup(p);
 	dprintk("%s stripe count  %d\n", __func__, cnt);
 	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
 		printk(KERN_WARNING "%s: stripe count %d greater than "
 		       "supported maximum %d\n", __func__,
 			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
-		goto out_err;
+		goto out_err_free_scratch;
+	}
+
+	/* read stripe indices */
+	stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
+	if (!stripe_indices)
+		goto out_err_free_scratch;
+
+	p = xdr_inline_decode(&stream, cnt << 2);
+	if (unlikely(!p))
+		goto out_err_free_stripe_indices;
+
+	indexp = &stripe_indices[0];
+	max_stripe_index = 0;
+	for (i = 0; i < cnt; i++) {
+		*indexp = be32_to_cpup(p++);
+		max_stripe_index = max(max_stripe_index, *indexp);
+		indexp++;
 	}
 
 	/* Check the multipath list count */
-	indicesp = p;
-	p += XDR_QUADLEN(cnt << 2);
-	num = be32_to_cpup(p++);
+	p = xdr_inline_decode(&stream, 4);
+	if (unlikely(!p))
+		goto out_err_free_stripe_indices;
+
+	num = be32_to_cpup(p);
 	dprintk("%s ds_num %u\n", __func__, num);
 	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
 		printk(KERN_WARNING "%s: multipath count %d greater than "
 			"supported maximum %d\n", __func__,
 			num, NFS4_PNFS_MAX_MULTI_CNT);
-		goto out_err;
+		goto out_err_free_stripe_indices;
 	}
+
+	/* validate stripe indices are all < num */
+	if (max_stripe_index >= num) {
+		printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n",
+			__func__, max_stripe_index, num);
+		goto out_err_free_stripe_indices;
+	}
+
 	dsaddr = kzalloc(sizeof(*dsaddr) +
 			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
 			GFP_KERNEL);
 	if (!dsaddr)
-		goto out_err;
-
-	dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
-	if (!dsaddr->stripe_indices)
-		goto out_err_free;
+		goto out_err_free_stripe_indices;
 
 	dsaddr->stripe_count = cnt;
+	dsaddr->stripe_indices = stripe_indices;
+	stripe_indices = NULL;
 	dsaddr->ds_num = num;
 
 	memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
 
-	/* Go back an read stripe indices */
-	p = indicesp;
-	indexp = &dsaddr->stripe_indices[0];
-	for (i = 0; i < dsaddr->stripe_count; i++) {
-		*indexp = be32_to_cpup(p++);
-		if (*indexp >= num)
-			goto out_err_free;
-		indexp++;
-	}
-	/* Skip already read multipath list count */
-	p++;
-
 	for (i = 0; i < dsaddr->ds_num; i++) {
 		int j;
+		u32 mp_count;
 
-		dummy = be32_to_cpup(p++); /* multipath count */
-		if (dummy > 1) {
+		p = xdr_inline_decode(&stream, 4);
+		if (unlikely(!p))
+			goto out_err_free_deviceid;
+
+		mp_count = be32_to_cpup(p); /* multipath count */
+		if (mp_count > 1) {
 			printk(KERN_WARNING
 			       "%s: Multipath count %d not supported, "
 			       "skipping all greater than 1\n", __func__,
-				dummy);
+				mp_count);
 		}
-		for (j = 0; j < dummy; j++) {
+		for (j = 0; j < mp_count; j++) {
 			if (j == 0) {
-				dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
+				dsaddr->ds_list[i] = decode_and_add_ds(&stream,
+					ino);
 				if (dsaddr->ds_list[i] == NULL)
-					goto out_err_free;
+					goto out_err_free_deviceid;
 			} else {
 				u32 len;
 				/* skip extra multipath */
-				len = be32_to_cpup(p++);
-				p += XDR_QUADLEN(len);
-				len = be32_to_cpup(p++);
-				p += XDR_QUADLEN(len);
-				continue;
+
+				/* read len, skip */
+				p = xdr_inline_decode(&stream, 4);
+				if (unlikely(!p))
+					goto out_err_free_deviceid;
+				len = be32_to_cpup(p);
+
+				p = xdr_inline_decode(&stream, len);
+				if (unlikely(!p))
+					goto out_err_free_deviceid;
+
+				/* read len, skip */
+				p = xdr_inline_decode(&stream, 4);
+				if (unlikely(!p))
+					goto out_err_free_deviceid;
+				len = be32_to_cpup(p);
+
+				p = xdr_inline_decode(&stream, len);
+				if (unlikely(!p))
+					goto out_err_free_deviceid;
 			}
 		}
 	}
+
+	__free_page(scratch);
 	return dsaddr;
 
-out_err_free:
+out_err_free_deviceid:
 	nfs4_fl_free_deviceid(dsaddr);
+	/* stripe_indicies was part of dsaddr */
+	goto out_err_free_scratch;
+out_err_free_stripe_indices:
+	kfree(stripe_indices);
+out_err_free_scratch:
+	__free_page(scratch);
 out_err:
 	dprintk("%s ERROR: returning NULL\n", __func__);
 	return NULL;
@@ -498,11 +571,6 @@
 			goto out_free;
 	}
 
-	/* set pdev->area */
-	pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
-	if (!pdev->area)
-		goto out_free;
-
 	memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
 	pdev->layout_type = LAYOUT_NFSV4_1_FILES;
 	pdev->pages = pages;
@@ -521,8 +589,6 @@
 	 */
 	dsaddr = decode_and_add_device(inode, pdev);
 out_free:
-	if (pdev->area != NULL)
-		vunmap(pdev->area);
 	for (i = 0; i < max_pages; i++)
 		__free_page(pages[i]);
 	kfree(pages);

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1d84e70..dfd1e6d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c

@@ -41,6 +41,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/gss_api.h>
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
@@ -71,7 +72,9 @@
 static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
-static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
+static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir,
+			     const struct qstr *name, struct nfs_fh *fhandle,
+			     struct nfs_fattr *fattr);
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
 			    struct nfs_fattr *fattr, struct iattr *sattr,
@@ -85,6 +88,8 @@
 	switch (err) {
 	case -NFS4ERR_RESOURCE:
 		return -EREMOTEIO;
+	case -NFS4ERR_WRONGSEC:
+		return -EPERM;
 	case -NFS4ERR_BADOWNER:
 	case -NFS4ERR_BADNAME:
 		return -EINVAL;
@@ -657,7 +662,8 @@
 	.rpc_call_done = nfs41_call_sync_done,
 };
 
-static int nfs4_call_sync_sequence(struct nfs_server *server,
+static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
+				   struct nfs_server *server,
 				   struct rpc_message *msg,
 				   struct nfs4_sequence_args *args,
 				   struct nfs4_sequence_res *res,
@@ -673,7 +679,7 @@
 		.cache_reply = cache_reply,
 	};
 	struct rpc_task_setup task_setup = {
-		.rpc_client = server->client,
+		.rpc_client = clnt,
 		.rpc_message = msg,
 		.callback_ops = &nfs41_call_sync_ops,
 		.callback_data = &data
@@ -692,13 +698,14 @@
 	return ret;
 }
 
-int _nfs4_call_sync_session(struct nfs_server *server,
+int _nfs4_call_sync_session(struct rpc_clnt *clnt,
+			    struct nfs_server *server,
 			    struct rpc_message *msg,
 			    struct nfs4_sequence_args *args,
 			    struct nfs4_sequence_res *res,
 			    int cache_reply)
 {
-	return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0);
+	return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0);
 }
 
 #else
@@ -709,19 +716,28 @@
 }
 #endif /* CONFIG_NFS_V4_1 */
 
-int _nfs4_call_sync(struct nfs_server *server,
+int _nfs4_call_sync(struct rpc_clnt *clnt,
+		    struct nfs_server *server,
 		    struct rpc_message *msg,
 		    struct nfs4_sequence_args *args,
 		    struct nfs4_sequence_res *res,
 		    int cache_reply)
 {
 	args->sa_session = res->sr_session = NULL;
-	return rpc_call_sync(server->client, msg, 0);
+	return rpc_call_sync(clnt, msg, 0);
 }
 
-#define nfs4_call_sync(server, msg, args, res, cache_reply) \
-	(server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \
-			&(res)->seq_res, (cache_reply))
+static inline
+int nfs4_call_sync(struct rpc_clnt *clnt,
+		   struct nfs_server *server,
+		   struct rpc_message *msg,
+		   struct nfs4_sequence_args *args,
+		   struct nfs4_sequence_res *res,
+		   int cache_reply)
+{
+	return server->nfs_client->cl_mvops->call_sync(clnt, server, msg,
+						args, res, cache_reply);
+}
 
 static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 {
@@ -1831,7 +1847,7 @@
 	} else
 		memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
 
-	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 	if (status == 0 && state != NULL)
 		renew_lease(server, timestamp);
 	return status;
@@ -2090,7 +2106,7 @@
 	};
 	int status;
 
-	status = nfs4_call_sync(server, &msg, &args, &res, 0);
+	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 	if (status == 0) {
 		memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
 		server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
@@ -2160,7 +2176,7 @@
 	};
 
 	nfs_fattr_init(info->fattr);
-	return nfs4_call_sync(server, &msg, &args, &res, 0);
+	return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 }
 
 static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -2176,15 +2192,43 @@
 	return err;
 }
 
+static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
+				struct nfs_fsinfo *info, rpc_authflavor_t flavor)
+{
+	struct rpc_auth *auth;
+	int ret;
+
+	auth = rpcauth_create(flavor, server->client);
+	if (!auth) {
+		ret = -EIO;
+		goto out;
+	}
+	ret = nfs4_lookup_root(server, fhandle, info);
+	if (ret < 0)
+		ret = -EAGAIN;
+out:
+	return ret;
+}
+
 /*
  * get the file handle for the "/" directory on the server
  */
 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 			      struct nfs_fsinfo *info)
 {
-	int status;
+	int i, len, status = 0;
+	rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS + 2];
 
-	status = nfs4_lookup_root(server, fhandle, info);
+	flav_array[0] = RPC_AUTH_UNIX;
+	len = gss_mech_list_pseudoflavors(&flav_array[1]);
+	flav_array[1+len] = RPC_AUTH_NULL;
+	len += 2;
+
+	for (i = 0; i < len; i++) {
+		status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
+		if (status == 0)
+			break;
+	}
 	if (status == 0)
 		status = nfs4_server_capabilities(server, fhandle);
 	if (status == 0)
@@ -2249,7 +2293,7 @@
 	};
 	
 	nfs_fattr_init(fattr);
-	return nfs4_call_sync(server, &msg, &args, &res, 0);
+	return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 }
 
 static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
@@ -2309,9 +2353,9 @@
 	return status;
 }
 
-static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *dirfh,
-		const struct qstr *name, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr)
+static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server,
+		const struct nfs_fh *dirfh, const struct qstr *name,
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	int		       status;
 	struct nfs4_lookup_arg args = {
@@ -2333,7 +2377,7 @@
 	nfs_fattr_init(fattr);
 
 	dprintk("NFS call  lookupfh %s\n", name->name);
-	status = nfs4_call_sync(server, &msg, &args, &res, 0);
+	status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
 	dprintk("NFS reply lookupfh: %d\n", status);
 	return status;
 }
@@ -2345,7 +2389,7 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
+		err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr);
 		/* FIXME: !!!! */
 		if (err == -NFS4ERR_MOVED) {
 			err = -EREMOTE;
@@ -2356,27 +2400,41 @@
 	return err;
 }
 
-static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name,
-		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
+		const struct qstr *name, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
 {
 	int status;
 	
 	dprintk("NFS call  lookup %s\n", name->name);
-	status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
+	status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
 	if (status == -NFS4ERR_MOVED)
 		status = nfs4_get_referral(dir, name, fattr, fhandle);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
 }
 
-static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh)
+{
+	memset(fh, 0, sizeof(struct nfs_fh));
+	fattr->fsid.major = 1;
+	fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
+		NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT;
+	fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	fattr->nlink = 2;
+}
+
+static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+			    struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_lookup(dir, name, fhandle, fattr),
+				_nfs4_proc_lookup(clnt, dir, name, fhandle, fattr),
 				&exception);
+		if (err == -EPERM)
+			nfs_fixup_secinfo_attributes(fattr, fhandle);
 	} while (exception.retry);
 	return err;
 }
@@ -2421,7 +2479,7 @@
 	if (res.fattr == NULL)
 		return -ENOMEM;
 
-	status = nfs4_call_sync(server, &msg, &args, &res, 0);
+	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 	if (!status) {
 		entry->mask = 0;
 		if (res.access & NFS4_ACCESS_READ)
@@ -2488,7 +2546,7 @@
 		.rpc_resp = &res,
 	};
 
-	return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0);
+	return nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
 }
 
 static int nfs4_proc_readlink(struct inode *inode, struct page *page,
@@ -2577,7 +2635,7 @@
 	if (res.dir_attr == NULL)
 		goto out;
 
-	status = nfs4_call_sync(server, &msg, &args, &res, 1);
+	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
 	if (status == 0) {
 		update_changeattr(dir, &res.cinfo);
 		nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2678,7 +2736,7 @@
 	if (res.old_fattr == NULL || res.new_fattr == NULL)
 		goto out;
 
-	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 	if (!status) {
 		update_changeattr(old_dir, &res.old_cinfo);
 		nfs_post_op_update_inode(old_dir, res.old_fattr);
@@ -2729,7 +2787,7 @@
 	if (res.fattr == NULL || res.dir_attr == NULL)
 		goto out;
 
-	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 	if (!status) {
 		update_changeattr(dir, &res.cinfo);
 		nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2792,8 +2850,8 @@
 
 static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
 {
-	int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg,
-				    &data->arg, &data->res, 1);
+	int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
+				    &data->arg.seq_args, &data->res.seq_res, 1);
 	if (status == 0) {
 		update_changeattr(dir, &data->res.dir_cinfo);
 		nfs_post_op_update_inode(dir, data->res.dir_fattr);
@@ -2905,7 +2963,7 @@
 			(unsigned long long)cookie);
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	res.pgbase = args.pgbase;
-	status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0);
+	status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
 	if (status >= 0) {
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
 		status += args.pgbase;
@@ -2997,7 +3055,7 @@
 	};
 
 	nfs_fattr_init(fsstat->fattr);
-	return  nfs4_call_sync(server, &msg, &args, &res, 0);
+	return  nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 }
 
 static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
@@ -3028,7 +3086,7 @@
 		.rpc_resp = &res,
 	};
 
-	return nfs4_call_sync(server, &msg, &args, &res, 0);
+	return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 }
 
 static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
@@ -3073,7 +3131,7 @@
 	}
 
 	nfs_fattr_init(pathconf->fattr);
-	return nfs4_call_sync(server, &msg, &args, &res, 0);
+	return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 }
 
 static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -3195,12 +3253,9 @@
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
 }
 
-static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct inode *inode = data->inode;
-	
-	if (!nfs4_sequence_done(task, &data->res.seq_res))
-		return -EAGAIN;
 
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
 		nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
@@ -3210,11 +3265,24 @@
 	return 0;
 }
 
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+{
+	if (!nfs4_sequence_done(task, &data->res.seq_res))
+		return -EAGAIN;
+	return data->write_done_cb(task, data);
+}
+
 static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
 	struct nfs_server *server = NFS_SERVER(data->inode);
-	
-	data->args.bitmask = server->cache_consistency_bitmask;
+
+	if (data->lseg) {
+		data->args.bitmask = NULL;
+		data->res.fattr = NULL;
+	} else
+		data->args.bitmask = server->cache_consistency_bitmask;
+	if (!data->write_done_cb)
+		data->write_done_cb = nfs4_commit_done_cb;
 	data->res.server = server;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
 }
@@ -3452,7 +3520,7 @@
 		resp_buf = buf;
 		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
 	}
-	ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0);
+	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
 	if (ret)
 		goto out_free;
 	if (res.acl_len > args.acl_len)
@@ -3527,7 +3595,7 @@
 	if (i < 0)
 		return i;
 	nfs_inode_return_delegation(inode);
-	ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+	ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 
 	/*
 	 * Free each page after tx, so the only ref left is
@@ -3890,7 +3958,7 @@
 	lsp = request->fl_u.nfs4_fl.owner;
 	arg.lock_owner.id = lsp->ls_id.id;
 	arg.lock_owner.s_dev = server->s_dev;
-	status = nfs4_call_sync(server, &msg, &arg, &res, 1);
+	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 	switch (status) {
 		case 0:
 			request->fl_type = F_UNLCK;
@@ -4618,12 +4686,46 @@
 	nfs_fattr_init(&fs_locations->fattr);
 	fs_locations->server = server;
 	fs_locations->nlocations = 0;
-	status = nfs4_call_sync(server, &msg, &args, &res, 0);
+	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 	nfs_fixup_referral_attributes(&fs_locations->fattr);
 	dprintk("%s: returned status = %d\n", __func__, status);
 	return status;
 }
 
+static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
+{
+	int status;
+	struct nfs4_secinfo_arg args = {
+		.dir_fh = NFS_FH(dir),
+		.name   = name,
+	};
+	struct nfs4_secinfo_res res = {
+		.flavors     = flavors,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	dprintk("NFS call  secinfo %s\n", name->name);
+	status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
+	dprintk("NFS reply  secinfo: %d\n", status);
+	return status;
+}
+
+int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(NFS_SERVER(dir),
+				_nfs4_proc_secinfo(dir, name, flavors),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 #ifdef CONFIG_NFS_V4_1
 /*
  * Check the exchange flags returned by the server for invalid flags, having
@@ -5516,8 +5618,6 @@
 	struct nfs4_layoutget *lgp = calldata;
 
 	dprintk("--> %s\n", __func__);
-	if (lgp->res.layout.buf != NULL)
-		free_page((unsigned long) lgp->res.layout.buf);
 	put_nfs_open_context(lgp->args.ctx);
 	kfree(calldata);
 	dprintk("<-- %s\n", __func__);
@@ -5549,12 +5649,7 @@
 
 	dprintk("--> %s\n", __func__);
 
-	lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
-	if (lgp->res.layout.buf == NULL) {
-		nfs4_layoutget_release(lgp);
-		return -ENOMEM;
-	}
-
+	lgp->res.layoutp = &lgp->args.layout;
 	lgp->res.seq_res.sr_slot = NULL;
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
@@ -5586,7 +5681,7 @@
 	int status;
 
 	dprintk("--> %s\n", __func__);
-	status = nfs4_call_sync(server, &msg, &args, &res, 0);
+	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 	dprintk("<-- %s status=%d\n", __func__, status);
 
 	return status;
@@ -5606,6 +5701,100 @@
 }
 EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
 
+static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutcommit_data *data = calldata;
+	struct nfs_server *server = NFS_SERVER(data->args.inode);
+
+	if (nfs4_setup_sequence(server, &data->args.seq_args,
+				&data->res.seq_res, 1, task))
+		return;
+	rpc_call_start(task);
+}
+
+static void
+nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_layoutcommit_data *data = calldata;
+	struct nfs_server *server = NFS_SERVER(data->args.inode);
+
+	if (!nfs4_sequence_done(task, &data->res.seq_res))
+		return;
+
+	switch (task->tk_status) { /* Just ignore these failures */
+	case NFS4ERR_DELEG_REVOKED: /* layout was recalled */
+	case NFS4ERR_BADIOMODE:     /* no IOMODE_RW layout for range */
+	case NFS4ERR_BADLAYOUT:     /* no layout */
+	case NFS4ERR_GRACE:	    /* loca_recalim always false */
+		task->tk_status = 0;
+	}
+
+	if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+		nfs_restart_rpc(task, server->nfs_client);
+		return;
+	}
+
+	if (task->tk_status == 0)
+		nfs_post_op_update_inode_force_wcc(data->args.inode,
+						   data->res.fattr);
+}
+
+static void nfs4_layoutcommit_release(void *calldata)
+{
+	struct nfs4_layoutcommit_data *data = calldata;
+
+	/* Matched by references in pnfs_set_layoutcommit */
+	put_lseg(data->lseg);
+	put_rpccred(data->cred);
+	kfree(data);
+}
+
+static const struct rpc_call_ops nfs4_layoutcommit_ops = {
+	.rpc_call_prepare = nfs4_layoutcommit_prepare,
+	.rpc_call_done = nfs4_layoutcommit_done,
+	.rpc_release = nfs4_layoutcommit_release,
+};
+
+int
+nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = data->cred,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.task = &data->task,
+		.rpc_client = NFS_CLIENT(data->args.inode),
+		.rpc_message = &msg,
+		.callback_ops = &nfs4_layoutcommit_ops,
+		.callback_data = data,
+		.flags = RPC_TASK_ASYNC,
+	};
+	struct rpc_task *task;
+	int status = 0;
+
+	dprintk("NFS: %4d initiating layoutcommit call. sync %d "
+		"lbw: %llu inode %lu\n",
+		data->task.tk_pid, sync,
+		data->args.lastbytewritten,
+		data->args.inode->i_ino);
+
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	if (sync == false)
+		goto out;
+	status = nfs4_wait_for_completion_rpc_task(task);
+	if (status != 0)
+		goto out;
+	status = task->tk_status;
+out:
+	dprintk("%s: status %d\n", __func__, status);
+	rpc_put_task(task);
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5741,6 +5930,7 @@
 	.close_context  = nfs4_close_context,
 	.open_context	= nfs4_atomic_open,
 	.init_client	= nfs4_init_client,
+	.secinfo	= nfs4_proc_secinfo,
 };
 
 static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0cf560f..dddfb57 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c

@@ -46,6 +46,7 @@
 #include <linux/kdev_t.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/gss_api.h>
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
@@ -112,7 +113,7 @@
 #define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
 #define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
 #define encode_fsinfo_maxsz	(encode_getattr_maxsz)
-#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
+#define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 15)
 #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
 #define decode_renew_maxsz	(op_decode_hdr_maxsz)
 #define encode_setclientid_maxsz \
@@ -253,6 +254,8 @@
 				(encode_getattr_maxsz)
 #define decode_fs_locations_maxsz \
 				(0)
+#define encode_secinfo_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
+#define decode_secinfo_maxsz	(op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
 
 #if defined(CONFIG_NFS_V4_1)
 #define NFS4_MAX_MACHINE_NAME_LEN (64)
@@ -324,6 +327,18 @@
 #define decode_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
 				decode_stateid_maxsz + \
 				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
+#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz +          \
+				2 /* offset */ + \
+				2 /* length */ + \
+				1 /* reclaim */ + \
+				encode_stateid_maxsz + \
+				1 /* new offset (true) */ + \
+				2 /* last byte written */ + \
+				1 /* nt_timechanged (false) */ + \
+				1 /* layoutupdate4 layout type */ + \
+				1 /* NULL filelayout layoutupdate4 payload */)
+#define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
+
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz	0
 #define decode_sequence_maxsz	0
@@ -676,6 +691,14 @@
 				 decode_putfh_maxsz + \
 				 decode_lookup_maxsz + \
 				 decode_fs_locations_maxsz)
+#define NFS4_enc_secinfo_sz 	(compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz + \
+				encode_putfh_maxsz + \
+				encode_secinfo_maxsz)
+#define NFS4_dec_secinfo_sz	(compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
+				decode_putfh_maxsz + \
+				decode_secinfo_maxsz)
 #if defined(CONFIG_NFS_V4_1)
 #define NFS4_enc_exchange_id_sz \
 				(compound_encode_hdr_maxsz + \
@@ -727,6 +750,17 @@
 				decode_sequence_maxsz + \
 				decode_putfh_maxsz +        \
 				decode_layoutget_maxsz)
+#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
+				encode_sequence_maxsz +\
+				encode_putfh_maxsz + \
+				encode_layoutcommit_maxsz + \
+				encode_getattr_maxsz)
+#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
+				decode_sequence_maxsz + \
+				decode_putfh_maxsz + \
+				decode_layoutcommit_maxsz + \
+				decode_getattr_maxsz)
+
 
 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
 				      compound_encode_hdr_maxsz +
@@ -1620,6 +1654,18 @@
 	hdr->replen += decode_delegreturn_maxsz;
 }
 
+static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
+{
+	int len = name->len;
+	__be32 *p;
+
+	p = reserve_space(xdr, 8 + len);
+	*p++ = cpu_to_be32(OP_SECINFO);
+	xdr_encode_opaque(p, name->name, len);
+	hdr->nops++;
+	hdr->replen += decode_secinfo_maxsz;
+}
+
 #if defined(CONFIG_NFS_V4_1)
 /* NFSv4.1 operations */
 static void encode_exchange_id(struct xdr_stream *xdr,
@@ -1816,6 +1862,34 @@
 	hdr->nops++;
 	hdr->replen += decode_layoutget_maxsz;
 }
+
+static int
+encode_layoutcommit(struct xdr_stream *xdr,
+		    const struct nfs4_layoutcommit_args *args,
+		    struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
+		NFS_SERVER(args->inode)->pnfs_curr_ld->id);
+
+	p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE);
+	*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
+	/* Only whole file layouts */
+	p = xdr_encode_hyper(p, 0); /* offset */
+	p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */
+	*p++ = cpu_to_be32(0); /* reclaim */
+	p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
+	*p++ = cpu_to_be32(1); /* newoffset = TRUE */
+	p = xdr_encode_hyper(p, args->lastbytewritten);
+	*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
+	*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
+	*p++ = cpu_to_be32(0); /* no file layout payload */
+
+	hdr->nops++;
+	hdr->replen += decode_layoutcommit_maxsz;
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -2294,7 +2368,8 @@
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_commit(xdr, args, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
+	if (args->bitmask)
+		encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -2465,6 +2540,24 @@
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode SECINFO request
+ */
+static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
+				struct nfs4_secinfo_arg *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->dir_fh, &hdr);
+	encode_secinfo(xdr, args->name, &hdr);
+	encode_nops(&hdr);
+}
+
 #if defined(CONFIG_NFS_V4_1)
 /*
  * EXCHANGE_ID request
@@ -2604,8 +2697,32 @@
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
 	encode_layoutget(xdr, args, &hdr);
+
+	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
+	    args->layout.pages, 0, args->layout.pglen);
+
 	encode_nops(&hdr);
 }
+
+/*
+ *  Encode LAYOUTCOMMIT request
+ */
+static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs4_layoutcommit_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+	encode_layoutcommit(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_nops(&hdr);
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -2925,6 +3042,7 @@
 		if (unlikely(!p))
 			goto out_overflow;
 		bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
+		return -be32_to_cpup(p);
 	}
 	return 0;
 out_overflow:
@@ -3912,6 +4030,10 @@
 	fattr->valid |= status;
 
 	status = decode_attr_error(xdr, bitmap);
+	if (status == -NFS4ERR_WRONGSEC) {
+		nfs_fixup_secinfo_attributes(fattr, fh);
+		status = 0;
+	}
 	if (status < 0)
 		goto xdr_error;
 
@@ -4680,6 +4802,73 @@
 	return decode_op_hdr(xdr, OP_DELEGRETURN);
 }
 
+static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor)
+{
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	flavor->gss.sec_oid4.len = be32_to_cpup(p);
+	if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN)
+		goto out_err;
+
+	p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len);
+	if (unlikely(!p))
+		goto out_overflow;
+	memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len);
+
+	p = xdr_inline_decode(xdr, 8);
+	if (unlikely(!p))
+		goto out_overflow;
+	flavor->gss.qop4 = be32_to_cpup(p++);
+	flavor->gss.service = be32_to_cpup(p);
+
+	return 0;
+
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+out_err:
+	return -EINVAL;
+}
+
+static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
+{
+	struct nfs4_secinfo_flavor *sec_flavor;
+	int status;
+	__be32 *p;
+	int i;
+
+	status = decode_op_hdr(xdr, OP_SECINFO);
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	res->flavors->num_flavors = be32_to_cpup(p);
+
+	for (i = 0; i < res->flavors->num_flavors; i++) {
+		sec_flavor = &res->flavors->flavors[i];
+		if ((char *)&sec_flavor[1] - (char *)res > PAGE_SIZE)
+			break;
+
+		p = xdr_inline_decode(xdr, 4);
+		if (unlikely(!p))
+			goto out_overflow;
+		sec_flavor->flavor = be32_to_cpup(p);
+
+		if (sec_flavor->flavor == RPC_AUTH_GSS) {
+			if (decode_secinfo_gss(xdr, sec_flavor))
+				break;
+		}
+	}
+
+	return 0;
+
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
 #if defined(CONFIG_NFS_V4_1)
 static int decode_exchange_id(struct xdr_stream *xdr,
 			      struct nfs41_exchange_id_res *res)
@@ -4950,6 +5139,9 @@
 	__be32 *p;
 	int status;
 	u32 layout_count;
+	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+	struct kvec *iov = rcvbuf->head;
+	u32 hdrlen, recvd;
 
 	status = decode_op_hdr(xdr, OP_LAYOUTGET);
 	if (status)
@@ -4966,17 +5158,14 @@
 		return -EINVAL;
 	}
 
-	p = xdr_inline_decode(xdr, 24);
+	p = xdr_inline_decode(xdr, 28);
 	if (unlikely(!p))
 		goto out_overflow;
 	p = xdr_decode_hyper(p, &res->range.offset);
 	p = xdr_decode_hyper(p, &res->range.length);
 	res->range.iomode = be32_to_cpup(p++);
 	res->type = be32_to_cpup(p++);
-
-	status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
-	if (unlikely(status))
-		return status;
+	res->layoutp->len = be32_to_cpup(p);
 
 	dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
 		__func__,
@@ -4984,12 +5173,18 @@
 		(unsigned long)res->range.length,
 		res->range.iomode,
 		res->type,
-		res->layout.len);
+		res->layoutp->len);
 
-	/* nfs4_proc_layoutget allocated a single page */
-	if (res->layout.len > PAGE_SIZE)
-		return -ENOMEM;
-	memcpy(res->layout.buf, p, res->layout.len);
+	hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
+	recvd = req->rq_rcv_buf.len - hdrlen;
+	if (res->layoutp->len > recvd) {
+		dprintk("NFS: server cheating in layoutget reply: "
+				"layout len %u > recvd %u\n",
+				res->layoutp->len, recvd);
+		return -EINVAL;
+	}
+
+	xdr_read_pages(xdr, res->layoutp->len);
 
 	if (layout_count > 1) {
 		/* We only handle a length one array at the moment.  Any
@@ -5006,6 +5201,35 @@
 	print_overflow_msg(__func__, xdr);
 	return -EIO;
 }
+
+static int decode_layoutcommit(struct xdr_stream *xdr,
+			       struct rpc_rqst *req,
+			       struct nfs4_layoutcommit_res *res)
+{
+	__be32 *p;
+	__u32 sizechanged;
+	int status;
+
+	status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
+	if (status)
+		return status;
+
+	p = xdr_inline_decode(xdr, 4);
+	if (unlikely(!p))
+		goto out_overflow;
+	sizechanged = be32_to_cpup(p);
+
+	if (sizechanged) {
+		/* throw away new size */
+		p = xdr_inline_decode(xdr, 8);
+		if (unlikely(!p))
+			goto out_overflow;
+	}
+	return 0;
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -5723,8 +5947,9 @@
 	status = decode_commit(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	if (res->fattr)
+		decode_getfattr(xdr, res->fattr, res->server,
+				!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
 }
@@ -5919,6 +6144,32 @@
 	return status;
 }
 
+/*
+ * Decode SECINFO response
+ */
+static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct nfs4_secinfo_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_secinfo(xdr, res);
+	if (status)
+		goto out;
+out:
+	return status;
+}
+
 #if defined(CONFIG_NFS_V4_1)
 /*
  * Decode EXCHANGE_ID response
@@ -6066,6 +6317,34 @@
 out:
 	return status;
 }
+
+/*
+ * Decode LAYOUTCOMMIT response
+ */
+static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
+				     struct xdr_stream *xdr,
+				     struct nfs4_layoutcommit_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_layoutcommit(xdr, rqstp, res);
+	if (status)
+		goto out;
+	decode_getfattr(xdr, res->fattr, res->server,
+			!RPC_IS_ASYNC(rqstp->rq_task));
+out:
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /**
@@ -6180,10 +6459,6 @@
 	{ NFS4ERR_SYMLINK,	-ELOOP		},
 	{ NFS4ERR_OP_ILLEGAL,	-EOPNOTSUPP	},
 	{ NFS4ERR_DEADLOCK,	-EDEADLK	},
-	{ NFS4ERR_WRONGSEC,	-EPERM		}, /* FIXME: this needs
-						    * to be handled by a
-						    * middle-layer.
-						    */
 	{ -1,			-EIO		}
 };
 
@@ -6258,6 +6533,7 @@
 	PROC(SETACL,		enc_setacl,		dec_setacl),
 	PROC(FS_LOCATIONS,	enc_fs_locations,	dec_fs_locations),
 	PROC(RELEASE_LOCKOWNER,	enc_release_lockowner,	dec_release_lockowner),
+	PROC(SECINFO,		enc_secinfo,		dec_secinfo),
 #if defined(CONFIG_NFS_V4_1)
 	PROC(EXCHANGE_ID,	enc_exchange_id,	dec_exchange_id),
 	PROC(CREATE_SESSION,	enc_create_session,	dec_create_session),
@@ -6267,6 +6543,7 @@
 	PROC(RECLAIM_COMPLETE,	enc_reclaim_complete,	dec_reclaim_complete),
 	PROC(GETDEVICEINFO,	enc_getdeviceinfo,	dec_getdeviceinfo),
 	PROC(LAYOUTGET,		enc_layoutget,		dec_layoutget),
+	PROC(LAYOUTCOMMIT,	enc_layoutcommit,	dec_layoutcommit),
 #endif /* CONFIG_NFS_V4_1 */
 };
 

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 23e7944..87a593c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c

@@ -223,6 +223,7 @@
 	desc->pg_count = 0;
 	desc->pg_bsize = bsize;
 	desc->pg_base = 0;
+	desc->pg_moreio = 0;
 	desc->pg_inode = inode;
 	desc->pg_doio = doio;
 	desc->pg_ioflags = io_flags;
@@ -335,9 +336,11 @@
 			   struct nfs_page *req)
 {
 	while (!nfs_pageio_do_add_request(desc, req)) {
+		desc->pg_moreio = 1;
 		nfs_pageio_doio(desc);
 		if (desc->pg_error < 0)
 			return 0;
+		desc->pg_moreio = 0;
 	}
 	return 1;
 }
@@ -395,6 +398,7 @@
 	pgoff_t idx_end;
 	int found, i;
 	int res;
+	struct list_head *list;
 
 	res = 0;
 	if (npages == 0)
@@ -415,10 +419,10 @@
 			idx_start = req->wb_index + 1;
 			if (nfs_set_page_tag_locked(req)) {
 				kref_get(&req->wb_kref);
-				nfs_list_remove_request(req);
 				radix_tree_tag_clear(&nfsi->nfs_page_tree,
 						req->wb_index, tag);
-				nfs_list_add_request(req, dst);
+				list = pnfs_choose_commit_list(req, dst);
+				nfs_list_add_request(req, list);
 				res++;
 				if (res == INT_MAX)
 					goto out;

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f38813a..d9ab972 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c

@@ -259,6 +259,7 @@
 		pnfs_free_lseg_list(&free_me);
 	}
 }
+EXPORT_SYMBOL_GPL(put_lseg);
 
 static bool
 should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
@@ -471,6 +472,9 @@
 	struct nfs_server *server = NFS_SERVER(ino);
 	struct nfs4_layoutget *lgp;
 	struct pnfs_layout_segment *lseg = NULL;
+	struct page **pages = NULL;
+	int i;
+	u32 max_resp_sz, max_pages;
 
 	dprintk("--> %s\n", __func__);
 
@@ -478,6 +482,21 @@
 	lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
 	if (lgp == NULL)
 		return NULL;
+
+	/* allocate pages for xdr post processing */
+	max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+	max_pages = max_resp_sz >> PAGE_SHIFT;
+
+	pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out_err_free;
+
+	for (i = 0; i < max_pages; i++) {
+		pages[i] = alloc_page(GFP_KERNEL);
+		if (!pages[i])
+			goto out_err_free;
+	}
+
 	lgp->args.minlength = NFS4_MAX_UINT64;
 	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
 	lgp->args.range.iomode = iomode;
@@ -486,6 +505,8 @@
 	lgp->args.type = server->pnfs_curr_ld->id;
 	lgp->args.inode = ino;
 	lgp->args.ctx = get_nfs_open_context(ctx);
+	lgp->args.layout.pages = pages;
+	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
 	lgp->lsegpp = &lseg;
 
 	/* Synchronously retrieve layout information from server and
@@ -496,7 +517,26 @@
 		/* remember that LAYOUTGET failed and suspend trying */
 		set_bit(lo_fail_bit(iomode), &lo->plh_flags);
 	}
+
+	/* free xdr pages */
+	for (i = 0; i < max_pages; i++)
+		__free_page(pages[i]);
+	kfree(pages);
+
 	return lseg;
+
+out_err_free:
+	/* free any allocated xdr pages, lgp as it's not used */
+	if (pages) {
+		for (i = 0; i < max_pages; i++) {
+			if (!pages[i])
+				break;
+			__free_page(pages[i]);
+		}
+		kfree(pages);
+	}
+	kfree(lgp);
+	return NULL;
 }
 
 bool pnfs_roc(struct inode *ino)
@@ -945,3 +985,105 @@
 	dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
 	return trypnfs;
 }
+
+/*
+ * Currently there is only one (whole file) write lseg.
+ */
+static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
+{
+	struct pnfs_layout_segment *lseg, *rv = NULL;
+
+	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
+		if (lseg->pls_range.iomode == IOMODE_RW)
+			rv = lseg;
+	return rv;
+}
+
+void
+pnfs_set_layoutcommit(struct nfs_write_data *wdata)
+{
+	struct nfs_inode *nfsi = NFS_I(wdata->inode);
+	loff_t end_pos = wdata->args.offset + wdata->res.count;
+
+	spin_lock(&nfsi->vfs_inode.i_lock);
+	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
+		/* references matched in nfs4_layoutcommit_release */
+		get_lseg(wdata->lseg);
+		wdata->lseg->pls_lc_cred =
+			get_rpccred(wdata->args.context->state->owner->so_cred);
+		mark_inode_dirty_sync(wdata->inode);
+		dprintk("%s: Set layoutcommit for inode %lu ",
+			__func__, wdata->inode->i_ino);
+	}
+	if (end_pos > wdata->lseg->pls_end_pos)
+		wdata->lseg->pls_end_pos = end_pos;
+	spin_unlock(&nfsi->vfs_inode.i_lock);
+}
+EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
+
+/*
+ * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
+ * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
+ * data to disk to allow the server to recover the data if it crashes.
+ * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
+ * is off, and a COMMIT is sent to a data server, or
+ * if WRITEs to a data server return NFS_DATA_SYNC.
+ */
+int
+pnfs_layoutcommit_inode(struct inode *inode, bool sync)
+{
+	struct nfs4_layoutcommit_data *data;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct pnfs_layout_segment *lseg;
+	struct rpc_cred *cred;
+	loff_t end_pos;
+	int status = 0;
+
+	dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
+
+	if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+		return 0;
+
+	/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
+	data = kzalloc(sizeof(*data), GFP_NOFS);
+	if (!data) {
+		mark_inode_dirty_sync(inode);
+		status = -ENOMEM;
+		goto out;
+	}
+
+	spin_lock(&inode->i_lock);
+	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
+		spin_unlock(&inode->i_lock);
+		kfree(data);
+		goto out;
+	}
+	/*
+	 * Currently only one (whole file) write lseg which is referenced
+	 * in pnfs_set_layoutcommit and will be found.
+	 */
+	lseg = pnfs_list_write_lseg(inode);
+
+	end_pos = lseg->pls_end_pos;
+	cred = lseg->pls_lc_cred;
+	lseg->pls_end_pos = 0;
+	lseg->pls_lc_cred = NULL;
+
+	memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
+		sizeof(nfsi->layout->plh_stateid.data));
+	spin_unlock(&inode->i_lock);
+
+	data->args.inode = inode;
+	data->lseg = lseg;
+	data->cred = cred;
+	nfs_fattr_init(&data->fattr);
+	data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
+	data->res.fattr = &data->fattr;
+	data->args.lastbytewritten = end_pos - 1;
+	data->res.server = NFS_SERVER(inode);
+
+	status = nfs4_proc_layoutcommit(data, sync);
+out:
+	dprintk("<-- %s status %d\n", __func__, status);
+	return status;
+}

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 6380b94..bc48272 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h

@@ -43,6 +43,8 @@
 	atomic_t pls_refcount;
 	unsigned long pls_flags;
 	struct pnfs_layout_hdr *pls_layout;
+	struct rpc_cred	*pls_lc_cred; /* LAYOUTCOMMIT credential */
+	loff_t pls_end_pos; /* LAYOUTCOMMIT write end */
 };
 
 enum pnfs_try_status {
@@ -74,6 +76,13 @@
 	/* test for nfs page cache coalescing */
 	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
 
+	/* Returns true if layoutdriver wants to divert this request to
+	 * driver's commit routine.
+	 */
+	bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg);
+	struct list_head * (*choose_commit_list) (struct nfs_page *req);
+	int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
+
 	/*
 	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
 	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
@@ -100,7 +109,6 @@
 	unsigned int  layout_type;
 	unsigned int  mincount;
 	struct page **pages;
-	void          *area;
 	unsigned int  pgbase;
 	unsigned int  pglen;
 };
@@ -145,7 +153,8 @@
 void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
-
+void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
+int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 
 static inline int lo_fail_bit(u32 iomode)
 {
@@ -169,6 +178,51 @@
 	return nfss->pnfs_curr_ld != NULL;
 }
 
+static inline void
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+{
+	if (lseg) {
+		struct pnfs_layoutdriver_type *ld;
+
+		ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
+		if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
+			set_bit(PG_PNFS_COMMIT, &req->wb_flags);
+			req->wb_commit_lseg = get_lseg(lseg);
+		}
+	}
+}
+
+static inline int
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+{
+	if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
+		return PNFS_NOT_ATTEMPTED;
+	return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
+}
+
+static inline struct list_head *
+pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
+{
+	struct list_head *rv;
+
+	if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) {
+		struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode;
+
+		set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
+		rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req);
+		/* matched by ref taken when PG_PNFS_COMMIT is set */
+		put_lseg(req->wb_commit_lseg);
+	} else
+		rv = mds;
+	return rv;
+}
+
+static inline void pnfs_clear_request_commit(struct nfs_page *req)
+{
+	if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags))
+		put_lseg(req->wb_commit_lseg);
+}
+
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -252,6 +306,31 @@
 	pgio->pg_test = NULL;
 }
 
+static inline void
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+{
+}
+
+static inline int
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+{
+	return PNFS_NOT_ATTEMPTED;
+}
+
+static inline struct list_head *
+pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
+{
+	return mds;
+}
+
+static inline void pnfs_clear_request_commit(struct nfs_page *req)
+{
+}
+
+static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */

diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b8ec170..ac40b85 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c

@@ -177,7 +177,7 @@
 }
 
 static int
-nfs_proc_lookup(struct inode *dir, struct qstr *name,
+nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	struct nfs_diropargs	arg = {

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 47a3ad6..85d7525 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c

@@ -59,6 +59,7 @@
 	}
 	return p;
 }
+EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
 
 void nfs_commit_free(struct nfs_write_data *p)
 {
@@ -66,6 +67,7 @@
 		kfree(p->pagevec);
 	mempool_free(p, nfs_commit_mempool);
 }
+EXPORT_SYMBOL_GPL(nfs_commit_free);
 
 struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 {
@@ -179,8 +181,8 @@
 	if (wbc->for_reclaim)
 		return FLUSH_HIGHPRI | FLUSH_STABLE;
 	if (wbc->for_kupdate || wbc->for_background)
-		return FLUSH_LOWPRI;
-	return 0;
+		return FLUSH_LOWPRI | FLUSH_COND_STABLE;
+	return FLUSH_COND_STABLE;
 }
 
 /*
@@ -441,7 +443,7 @@
  * Add a request to the inode's commit list.
  */
 static void
-nfs_mark_request_commit(struct nfs_page *req)
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
 {
 	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -453,6 +455,7 @@
 			NFS_PAGE_TAG_COMMIT);
 	nfsi->ncommit++;
 	spin_unlock(&inode->i_lock);
+	pnfs_mark_request_commit(req, lseg);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -474,14 +477,18 @@
 static inline
 int nfs_write_need_commit(struct nfs_write_data *data)
 {
-	return data->verf.committed != NFS_FILE_SYNC;
+	if (data->verf.committed == NFS_DATA_SYNC)
+		return data->lseg == NULL;
+	else
+		return data->verf.committed != NFS_FILE_SYNC;
 }
 
 static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req)
+int nfs_reschedule_unstable_write(struct nfs_page *req,
+				  struct nfs_write_data *data)
 {
 	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
-		nfs_mark_request_commit(req);
+		nfs_mark_request_commit(req, data->lseg);
 		return 1;
 	}
 	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -492,7 +499,7 @@
 }
 #else
 static inline void
-nfs_mark_request_commit(struct nfs_page *req)
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
 {
 }
 
@@ -509,7 +516,8 @@
 }
 
 static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req)
+int nfs_reschedule_unstable_write(struct nfs_page *req,
+				  struct nfs_write_data *data)
 {
 	return 0;
 }
@@ -612,9 +620,11 @@
 	}
 
 	if (nfs_clear_request_commit(req) &&
-			radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
-				req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL)
+	    radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
+				 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
 		NFS_I(inode)->ncommit--;
+		pnfs_clear_request_commit(req);
+	}
 
 	/* Okay, the request matches. Update the region */
 	if (offset < req->wb_offset) {
@@ -762,11 +772,12 @@
 	return status;
 }
 
-static void nfs_writepage_release(struct nfs_page *req)
+static void nfs_writepage_release(struct nfs_page *req,
+				  struct nfs_write_data *data)
 {
 	struct page *page = req->wb_page;
 
-	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req))
+	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
 		nfs_inode_remove_request(req);
 	nfs_clear_page_tag_locked(req);
 	nfs_end_page_writeback(page);
@@ -863,7 +874,7 @@
 	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.lock_context = req->wb_lock_context;
 	data->args.stable  = NFS_UNSTABLE;
-	if (how & FLUSH_STABLE) {
+	if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
 		data->args.stable = NFS_DATA_SYNC;
 		if (!nfs_need_commit(NFS_I(inode)))
 			data->args.stable = NFS_FILE_SYNC;
@@ -912,6 +923,12 @@
 
 	nfs_list_remove_request(req);
 
+	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
+	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
+	     desc->pg_count > wsize))
+		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
+
+
 	nbytes = desc->pg_count;
 	do {
 		size_t len = min(nbytes, wsize);
@@ -1002,6 +1019,10 @@
 	if ((!lseg) && list_is_singular(&data->pages))
 		lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
 
+	if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
+	    (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
+		desc->pg_ioflags &= ~FLUSH_COND_STABLE;
+
 	/* Set up the argument struct */
 	ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
 out:
@@ -1074,7 +1095,7 @@
 
 out:
 	if (atomic_dec_and_test(&req->wb_complete))
-		nfs_writepage_release(req);
+		nfs_writepage_release(req, data);
 	nfs_writedata_release(calldata);
 }
 
@@ -1141,7 +1162,7 @@
 
 		if (nfs_write_need_commit(data)) {
 			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-			nfs_mark_request_commit(req);
+			nfs_mark_request_commit(req, data->lseg);
 			dprintk(" marked for commit\n");
 			goto next;
 		}
@@ -1251,78 +1272,60 @@
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
 {
+	int ret;
+
 	if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
 		return 1;
-	if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
-				NFS_INO_COMMIT, nfs_wait_bit_killable,
-				TASK_KILLABLE))
-		return 1;
-	return 0;
+	if (!may_wait)
+		return 0;
+	ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
+				NFS_INO_COMMIT,
+				nfs_wait_bit_killable,
+				TASK_KILLABLE);
+	return (ret < 0) ? ret : 1;
 }
 
-static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+void nfs_commit_clear_lock(struct nfs_inode *nfsi)
 {
 	clear_bit(NFS_INO_COMMIT, &nfsi->flags);
 	smp_mb__after_clear_bit();
 	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
 }
+EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
 
-
-static void nfs_commitdata_release(void *data)
+void nfs_commitdata_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
 
+	put_lseg(wdata->lseg);
 	put_nfs_open_context(wdata->args.context);
 	nfs_commit_free(wdata);
 }
+EXPORT_SYMBOL_GPL(nfs_commitdata_release);
 
-/*
- * Set up the argument/result storage required for the RPC call.
- */
-static int nfs_commit_rpcsetup(struct list_head *head,
-		struct nfs_write_data *data,
-		int how)
+int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
+			const struct rpc_call_ops *call_ops,
+			int how)
 {
-	struct nfs_page *first = nfs_list_entry(head->next);
-	struct inode *inode = first->wb_context->path.dentry->d_inode;
-	int priority = flush_task_priority(how);
 	struct rpc_task *task;
+	int priority = flush_task_priority(how);
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
-		.rpc_cred = first->wb_context->cred,
+		.rpc_cred = data->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
 		.task = &data->task,
-		.rpc_client = NFS_CLIENT(inode),
+		.rpc_client = clnt,
 		.rpc_message = &msg,
-		.callback_ops = &nfs_commit_ops,
+		.callback_ops = call_ops,
 		.callback_data = data,
 		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 		.priority = priority,
 	};
-
-	/* Set up the RPC argument and reply structs
-	 * NB: take care not to mess about with data->commit et al. */
-
-	list_splice_init(head, &data->pages);
-
-	data->inode	  = inode;
-	data->cred	  = msg.rpc_cred;
-
-	data->args.fh     = NFS_FH(data->inode);
-	/* Note: we always request a commit of the entire inode */
-	data->args.offset = 0;
-	data->args.count  = 0;
-	data->args.context = get_nfs_open_context(first->wb_context);
-	data->res.count   = 0;
-	data->res.fattr   = &data->fattr;
-	data->res.verf    = &data->verf;
-	nfs_fattr_init(&data->fattr);
-
 	/* Set up the initial task struct.  */
-	NFS_PROTO(inode)->commit_setup(data, &msg);
+	NFS_PROTO(data->inode)->commit_setup(data, &msg);
 
 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
 
@@ -1334,6 +1337,56 @@
 	rpc_put_task(task);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nfs_initiate_commit);
+
+/*
+ * Set up the argument/result storage required for the RPC call.
+ */
+void nfs_init_commit(struct nfs_write_data *data,
+			    struct list_head *head,
+			    struct pnfs_layout_segment *lseg)
+{
+	struct nfs_page *first = nfs_list_entry(head->next);
+	struct inode *inode = first->wb_context->path.dentry->d_inode;
+
+	/* Set up the RPC argument and reply structs
+	 * NB: take care not to mess about with data->commit et al. */
+
+	list_splice_init(head, &data->pages);
+
+	data->inode	  = inode;
+	data->cred	  = first->wb_context->cred;
+	data->lseg	  = lseg; /* reference transferred */
+	data->mds_ops     = &nfs_commit_ops;
+
+	data->args.fh     = NFS_FH(data->inode);
+	/* Note: we always request a commit of the entire inode */
+	data->args.offset = 0;
+	data->args.count  = 0;
+	data->args.context = get_nfs_open_context(first->wb_context);
+	data->res.count   = 0;
+	data->res.fattr   = &data->fattr;
+	data->res.verf    = &data->verf;
+	nfs_fattr_init(&data->fattr);
+}
+EXPORT_SYMBOL_GPL(nfs_init_commit);
+
+void nfs_retry_commit(struct list_head *page_list,
+		      struct pnfs_layout_segment *lseg)
+{
+	struct nfs_page *req;
+
+	while (!list_empty(page_list)) {
+		req = nfs_list_entry(page_list->next);
+		nfs_list_remove_request(req);
+		nfs_mark_request_commit(req, lseg);
+		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+			     BDI_RECLAIMABLE);
+		nfs_clear_page_tag_locked(req);
+	}
+}
+EXPORT_SYMBOL_GPL(nfs_retry_commit);
 
 /*
  * Commit dirty pages
@@ -1342,7 +1395,6 @@
 nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 {
 	struct nfs_write_data	*data;
-	struct nfs_page         *req;
 
 	data = nfs_commitdata_alloc();
 
@@ -1350,17 +1402,10 @@
 		goto out_bad;
 
 	/* Set up the argument struct */
-	return nfs_commit_rpcsetup(head, data, how);
+	nfs_init_commit(data, head, NULL);
+	return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
  out_bad:
-	while (!list_empty(head)) {
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_mark_request_commit(req);
-		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
-				BDI_RECLAIMABLE);
-		nfs_clear_page_tag_locked(req);
-	}
+	nfs_retry_commit(head, NULL);
 	nfs_commit_clear_lock(NFS_I(inode));
 	return -ENOMEM;
 }
@@ -1380,10 +1425,9 @@
 		return;
 }
 
-static void nfs_commit_release(void *calldata)
+void nfs_commit_release_pages(struct nfs_write_data *data)
 {
-	struct nfs_write_data	*data = calldata;
-	struct nfs_page		*req;
+	struct nfs_page	*req;
 	int status = data->task.tk_status;
 
 	while (!list_empty(&data->pages)) {
@@ -1417,6 +1461,14 @@
 	next:
 		nfs_clear_page_tag_locked(req);
 	}
+}
+EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
+
+static void nfs_commit_release(void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+
+	nfs_commit_release_pages(data);
 	nfs_commit_clear_lock(NFS_I(data->inode));
 	nfs_commitdata_release(calldata);
 }
@@ -1433,23 +1485,30 @@
 {
 	LIST_HEAD(head);
 	int may_wait = how & FLUSH_SYNC;
-	int res = 0;
+	int res;
 
-	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+	res = nfs_commit_set_lock(NFS_I(inode), may_wait);
+	if (res <= 0)
 		goto out_mark_dirty;
 	spin_lock(&inode->i_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&inode->i_lock);
 	if (res) {
-		int error = nfs_commit_list(inode, &head, how);
+		int error;
+
+		error = pnfs_commit_list(inode, &head, how);
+		if (error == PNFS_NOT_ATTEMPTED)
+			error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
-		if (may_wait)
-			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
-					nfs_wait_bit_killable,
-					TASK_KILLABLE);
-		else
+		if (!may_wait)
 			goto out_mark_dirty;
+		error = wait_on_bit(&NFS_I(inode)->flags,
+				NFS_INO_COMMIT,
+				nfs_wait_bit_killable,
+				TASK_KILLABLE);
+		if (error < 0)
+			return error;
 	} else
 		nfs_commit_clear_lock(NFS_I(inode));
 	return res;
@@ -1503,7 +1562,22 @@
 
 int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return nfs_commit_unstable_pages(inode, wbc);
+	int ret;
+
+	ret = nfs_commit_unstable_pages(inode, wbc);
+	if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
+		int status;
+		bool sync = true;
+
+		if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking ||
+		    wbc->for_background)
+			sync = false;
+
+		status = pnfs_layoutcommit_inode(inode, sync);
+		if (status < 0)
+			return status;
+	}
+	return ret;
 }
 
 /*

diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 84c27d6..ec0f277 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c

@@ -117,7 +117,6 @@
 		 * invoked in contexts where a memory allocation failure is
 		 * fatal.  Fortunately this fake ACL is small enough to
 		 * construct on the stack. */
-		memset(acl2, 0, sizeof(acl2));
 		posix_acl_init(acl2, 4);
 
 		/* Insert entries in canonical order: other orders seem

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 4c29fcf..07ea8d3 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c

@@ -22,13 +22,14 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
-#include <linux/writeback.h> /* for inode_lock */
 
 #include <asm/atomic.h>
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
+#include "../internal.h"
+
 /*
  * Recalculate the mask of events relevant to a given inode locked.
  */
@@ -237,15 +238,14 @@
  * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
  * @list: list of inodes being unmounted (sb->s_inodes)
  *
- * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
- * We temporarily drop inode_lock, however, and CAN block.
+ * Called during unmount with no locks held, so needs to be safe against
+ * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block.
  */
 void fsnotify_unmount_inodes(struct list_head *list)
 {
 	struct inode *inode, *next_i, *need_iput = NULL;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
 		struct inode *need_iput_tmp;
 
@@ -254,8 +254,11 @@
 		 * I_WILL_FREE, or I_NEW which is fine because by that point
 		 * the inode cannot have any associated watches.
 		 */
-		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+		spin_lock(&inode->i_lock);
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+			spin_unlock(&inode->i_lock);
 			continue;
+		}
 
 		/*
 		 * If i_count is zero, the inode cannot have any watches and
@@ -263,8 +266,10 @@
 		 * evict all inodes with zero i_count from icache which is
 		 * unnecessarily violent and may in fact be illegal to do.
 		 */
-		if (!atomic_read(&inode->i_count))
+		if (!atomic_read(&inode->i_count)) {
+			spin_unlock(&inode->i_lock);
 			continue;
+		}
 
 		need_iput_tmp = need_iput;
 		need_iput = NULL;
@@ -274,22 +279,25 @@
 			__iget(inode);
 		else
 			need_iput_tmp = NULL;
+		spin_unlock(&inode->i_lock);
 
 		/* In case the dropping of a reference would nuke next_i. */
 		if ((&next_i->i_sb_list != list) &&
-		    atomic_read(&next_i->i_count) &&
-		    !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
-			__iget(next_i);
-			need_iput = next_i;
+		    atomic_read(&next_i->i_count)) {
+			spin_lock(&next_i->i_lock);
+			if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
+				__iget(next_i);
+				need_iput = next_i;
+			}
+			spin_unlock(&next_i->i_lock);
 		}
 
 		/*
-		 * We can safely drop inode_lock here because we hold
+		 * We can safely drop inode_sb_list_lock here because we hold
 		 * references on both inode and next_i.  Also no new inodes
-		 * will be added since the umount has begun.  Finally,
-		 * iprune_mutex keeps shrink_icache_memory() away.
+		 * will be added since the umount has begun.
 		 */
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_sb_list_lock);
 
 		if (need_iput_tmp)
 			iput(need_iput_tmp);
@@ -301,7 +309,7 @@
 
 		iput(inode);
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_sb_list_lock);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 }

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 325185e..50c0085 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c

@@ -91,7 +91,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/srcu.h>
-#include <linux/writeback.h> /* for inode_lock */
 
 #include <asm/atomic.h>
 

diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 85eebff..e86577d 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c

@@ -23,7 +23,6 @@
 #include <linux/mount.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
-#include <linux/writeback.h> /* for inode_lock */
 
 #include <asm/atomic.h>
 

diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index a627ed8..0b56c6b 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c

@@ -54,7 +54,7 @@
  *
  * Return 1 if the attributes match and 0 if not.
  *
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the inode->i_lock spin lock held so it is not
  * allowed to sleep.
  */
 int ntfs_test_inode(struct inode *vi, ntfs_attr *na)
@@ -98,7 +98,7 @@
  *
  * Return 0 on success and -errno on error.
  *
- * NOTE: This function runs with the inode_lock spin lock held so it is not
+ * NOTE: This function runs with the inode->i_lock spin lock held so it is not
  * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
  */
 static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7c708a4..2e7addf 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c

@@ -182,7 +182,8 @@
 	struct proc_maps_private *priv = m->private;
 	struct vm_area_struct *vma = v;
 
-	vma_stop(priv, vma);
+	if (!IS_ERR(vma))
+		vma_stop(priv, vma);
 	if (priv->task)
 		put_task_struct(priv->task);
 }

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a2a622e..fcc8ae7 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c

@@ -76,7 +76,7 @@
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
 #include <linux/quotaops.h>
-#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
+#include "../internal.h" /* ugh */
 
 #include <asm/uaccess.h>
 
@@ -900,33 +900,38 @@
 	int reserved = 0;
 #endif
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+		spin_lock(&inode->i_lock);
+		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+		    !atomic_read(&inode->i_writecount) ||
+		    !dqinit_needed(inode, type)) {
+			spin_unlock(&inode->i_lock);
 			continue;
+		}
 #ifdef CONFIG_QUOTA_DEBUG
 		if (unlikely(inode_get_rsv_space(inode) > 0))
 			reserved = 1;
 #endif
-		if (!atomic_read(&inode->i_writecount))
-			continue;
-		if (!dqinit_needed(inode, type))
-			continue;
-
 		__iget(inode);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&inode_sb_list_lock);
 
 		iput(old_inode);
 		__dquot_initialize(inode, type);
-		/* We hold a reference to 'inode' so it couldn't have been
-		 * removed from s_inodes list while we dropped the inode_lock.
-		 * We cannot iput the inode now as we can be holding the last
-		 * reference and we cannot iput it under inode_lock. So we
-		 * keep the reference and iput it later. */
+
+		/*
+		 * We hold a reference to 'inode' so it couldn't have been
+		 * removed from s_inodes list while we dropped the
+		 * inode_sb_list_lock We cannot iput the inode now as we can be
+		 * holding the last reference and we cannot iput it under
+		 * inode_sb_list_lock. So we keep the reference and iput it
+		 * later.
+		 */
 		old_inode = inode;
-		spin_lock(&inode_lock);
+		spin_lock(&inode_sb_list_lock);
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 	iput(old_inode);
 
 #ifdef CONFIG_QUOTA_DEBUG
@@ -1007,7 +1012,7 @@
 	struct inode *inode;
 	int reserved = 0;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode_sb_list_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		/*
 		 *  We have to scan also I_NEW inodes because they can already
@@ -1021,7 +1026,7 @@
 			remove_inode_dquot_ref(inode, type, tofree_head);
 		}
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_sb_list_lock);
 #ifdef CONFIG_QUOTA_DEBUG
 	if (reserved) {
 		printk(KERN_WARNING "VFS (%s): Writes happened after quota"

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index c2c9ba0..f2d2faf 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h

@@ -165,10 +165,36 @@
 #define WARN_ON_RATELIMIT(condition, state)			\
 		WARN_ON((condition) && __ratelimit(state))
 
+/*
+ * WARN_ON_SMP() is for cases that the warning is either
+ * meaningless for !SMP or may even cause failures.
+ * This is usually used for cases that we have
+ * WARN_ON(!spin_is_locked(&lock)) checks, as spin_is_locked()
+ * returns 0 for uniprocessor settings.
+ * It can also be used with values that are only defined
+ * on SMP:
+ *
+ * struct foo {
+ *  [...]
+ * #ifdef CONFIG_SMP
+ *	int bar;
+ * #endif
+ * };
+ *
+ * void func(struct foo *zoot)
+ * {
+ *	WARN_ON_SMP(!zoot->bar);
+ *
+ * For CONFIG_SMP, WARN_ON_SMP() should act the same as WARN_ON(),
+ * and should be a nop and return false for uniprocessor.
+ *
+ * if (WARN_ON_SMP(x)) returns true only when CONFIG_SMP is set
+ * and x is true.
+ */
 #ifdef CONFIG_SMP
 # define WARN_ON_SMP(x)			WARN_ON(x)
 #else
-# define WARN_ON_SMP(x)			do { } while (0)
+# define WARN_ON_SMP(x)			({0;})
 #endif
 
 #endif

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f301cea..ebdaafa 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h

@@ -688,6 +688,28 @@
 	}
 
 /**
+ * PERCPU_INPUT - the percpu input sections
+ * @cacheline: cacheline size
+ *
+ * The core percpu section names and core symbols which do not rely
+ * directly upon load addresses.
+ *
+ * @cacheline is used to align subsections to avoid false cacheline
+ * sharing between subsections for different purposes.
+ */
+#define PERCPU_INPUT(cacheline)						\
+	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
+	*(.data..percpu..first)						\
+	. = ALIGN(PAGE_SIZE);						\
+	*(.data..percpu..page_aligned)					\
+	. = ALIGN(cacheline);						\
+	*(.data..percpu..readmostly)					\
+	. = ALIGN(cacheline);						\
+	*(.data..percpu)						\
+	*(.data..percpu..shared_aligned)				\
+	VMLINUX_SYMBOL(__per_cpu_end) = .;
+
+/**
  * PERCPU_VADDR - define output section for percpu area
  * @cacheline: cacheline size
  * @vaddr: explicit base address (optional)
@@ -715,16 +737,7 @@
 	VMLINUX_SYMBOL(__per_cpu_load) = .;				\
 	.data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load)		\
 				- LOAD_OFFSET) {			\
-		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
-		*(.data..percpu..first)					\
-		. = ALIGN(PAGE_SIZE);					\
-		*(.data..percpu..page_aligned)				\
-		. = ALIGN(cacheline);					\
-		*(.data..percpu..readmostly)				\
-		. = ALIGN(cacheline);					\
-		*(.data..percpu)					\
-		*(.data..percpu..shared_aligned)			\
-		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
+		PERCPU_INPUT(cacheline)					\
 	} phdr								\
 	. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data..percpu);
 
@@ -744,16 +757,7 @@
 	. = ALIGN(PAGE_SIZE);						\
 	.data..percpu	: AT(ADDR(.data..percpu) - LOAD_OFFSET) {	\
 		VMLINUX_SYMBOL(__per_cpu_load) = .;			\
-		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
-		*(.data..percpu..first)					\
-		. = ALIGN(PAGE_SIZE);					\
-		*(.data..percpu..page_aligned)				\
-		. = ALIGN(cacheline);					\
-		*(.data..percpu..readmostly)				\
-		. = ALIGN(cacheline);					\
-		*(.data..percpu)					\
-		*(.data..percpu..shared_aligned)			\
-		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
+		PERCPU_INPUT(cacheline)					\
 	}
 
 

diff --git a/include/drm/drm.h b/include/drm/drm.h
index 9ac4313..4be33b4 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h

@@ -463,12 +463,15 @@
 enum drm_vblank_seq_type {
 	_DRM_VBLANK_ABSOLUTE = 0x0,	/**< Wait for specific vblank sequence number */
 	_DRM_VBLANK_RELATIVE = 0x1,	/**< Wait for given number of vblanks */
+	/* bits 1-6 are reserved for high crtcs */
+	_DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e,
 	_DRM_VBLANK_EVENT = 0x4000000,   /**< Send event instead of blocking */
 	_DRM_VBLANK_FLIP = 0x8000000,   /**< Scheduled buffer swap should flip */
 	_DRM_VBLANK_NEXTONMISS = 0x10000000,	/**< If missed, wait for next vblank */
 	_DRM_VBLANK_SECONDARY = 0x20000000,	/**< Secondary display controller */
 	_DRM_VBLANK_SIGNAL = 0x40000000	/**< Send signal instead of blocking, unsupported */
 };
+#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1
 
 #define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE)
 #define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \
@@ -753,6 +756,7 @@
 };
 
 #define DRM_CAP_DUMB_BUFFER 0x1
+#define DRM_CAP_VBLANK_HIGH_CRTC 0x2
 
 /* typedef area */
 #ifndef __KERNEL__

diff --git a/include/linux/bch.h b/include/linux/bch.h
new file mode 100644
index 0000000..295b4ef
--- /dev/null
+++ b/include/linux/bch.h

@@ -0,0 +1,79 @@
+/*
+ * Generic binary BCH encoding/decoding library
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Copyright © 2011 Parrot S.A.
+ *
+ * Author: Ivan Djelic <ivan.djelic@parrot.com>
+ *
+ * Description:
+ *
+ * This library provides runtime configurable encoding/decoding of binary
+ * Bose-Chaudhuri-Hocquenghem (BCH) codes.
+*/
+#ifndef _BCH_H
+#define _BCH_H
+
+#include <linux/types.h>
+
+/**
+ * struct bch_control - BCH control structure
+ * @m:          Galois field order
+ * @n:          maximum codeword size in bits (= 2^m-1)
+ * @t:          error correction capability in bits
+ * @ecc_bits:   ecc exact size in bits, i.e. generator polynomial degree (<=m*t)
+ * @ecc_bytes:  ecc max size (m*t bits) in bytes
+ * @a_pow_tab:  Galois field GF(2^m) exponentiation lookup table
+ * @a_log_tab:  Galois field GF(2^m) log lookup table
+ * @mod8_tab:   remainder generator polynomial lookup tables
+ * @ecc_buf:    ecc parity words buffer
+ * @ecc_buf2:   ecc parity words buffer
+ * @xi_tab:     GF(2^m) base for solving degree 2 polynomial roots
+ * @syn:        syndrome buffer
+ * @cache:      log-based polynomial representation buffer
+ * @elp:        error locator polynomial
+ * @poly_2t:    temporary polynomials of degree 2t
+ */
+struct bch_control {
+	unsigned int    m;
+	unsigned int    n;
+	unsigned int    t;
+	unsigned int    ecc_bits;
+	unsigned int    ecc_bytes;
+/* private: */
+	uint16_t       *a_pow_tab;
+	uint16_t       *a_log_tab;
+	uint32_t       *mod8_tab;
+	uint32_t       *ecc_buf;
+	uint32_t       *ecc_buf2;
+	unsigned int   *xi_tab;
+	unsigned int   *syn;
+	int            *cache;
+	struct gf_poly *elp;
+	struct gf_poly *poly_2t[4];
+};
+
+struct bch_control *init_bch(int m, int t, unsigned int prim_poly);
+
+void free_bch(struct bch_control *bch);
+
+void encode_bch(struct bch_control *bch, const uint8_t *data,
+		unsigned int len, uint8_t *ecc);
+
+int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len,
+	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
+	       const unsigned int *syn, unsigned int *errloc);
+
+#endif /* _BCH_H */

diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h
index 5dd4285..5428885 100644
--- a/include/linux/davinci_emac.h
+++ b/include/linux/davinci_emac.h

@@ -36,6 +36,7 @@
 
 	u8 rmii_en;
 	u8 version;
+	bool no_bd_ram;
 	void (*interrupt_enable) (void);
 	void (*interrupt_disable) (void);
 };

diff --git a/include/linux/device.h b/include/linux/device.h
index 144ec13..ab8dfc0 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h

@@ -633,8 +633,12 @@
 /* drivers/base/power/shutdown.c */
 extern void device_shutdown(void);
 
+#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
 /* drivers/base/sys.c */
 extern void sysdev_shutdown(void);
+#else
+static inline void sysdev_shutdown(void) { }
+#endif
 
 /* debugging and troubleshooting/diagnostic helpers. */
 extern const char *dev_driver_string(const struct device *dev);

diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 78bbf47..3708455 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h

@@ -267,9 +267,9 @@
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	19
-#define DM_VERSION_PATCHLEVEL	1
-#define DM_VERSION_EXTRA	"-ioctl (2011-01-07)"
+#define DM_VERSION_MINOR	20
+#define DM_VERSION_PATCHLEVEL	0
+#define DM_VERSION_EXTRA	"-ioctl (2011-02-02)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
@@ -328,4 +328,10 @@
  */
 #define DM_UUID_FLAG			(1 << 14) /* In */
 
+/*
+ * If set, all buffers are wiped after use. Use when sending
+ * or requesting sensitive data such as an encryption key.
+ */
+#define DM_SECURE_DATA_FLAG		(1 << 15) /* In */
+
 #endif				/* _LINUX_DM_IOCTL_H */

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index ef44c7a..d18d673 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h

@@ -53,10 +53,10 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.9"
+#define REL_VERSION "8.3.10"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 95
+#define PRO_VERSION_MAX 96
 
 
 enum drbd_io_error_p {
@@ -96,8 +96,14 @@
 	OND_SUSPEND_IO
 };
 
+enum drbd_on_congestion {
+	OC_BLOCK,
+	OC_PULL_AHEAD,
+	OC_DISCONNECT,
+};
+
 /* KEEP the order, do not delete or insert. Only append. */
-enum drbd_ret_codes {
+enum drbd_ret_code {
 	ERR_CODE_BASE		= 100,
 	NO_ERROR		= 101,
 	ERR_LOCAL_ADDR		= 102,
@@ -146,6 +152,9 @@
 	ERR_PERM		= 152,
 	ERR_NEED_APV_93		= 153,
 	ERR_STONITH_AND_PROT_A  = 154,
+	ERR_CONG_NOT_PROTO_A	= 155,
+	ERR_PIC_AFTER_DEP	= 156,
+	ERR_PIC_PEER_DEP	= 157,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
@@ -199,6 +208,10 @@
 	C_VERIFY_T,
 	C_PAUSED_SYNC_S,
 	C_PAUSED_SYNC_T,
+
+	C_AHEAD,
+	C_BEHIND,
+
 	C_MASK = 31
 };
 
@@ -259,7 +272,7 @@
 	unsigned int i;
 };
 
-enum drbd_state_ret_codes {
+enum drbd_state_rv {
 	SS_CW_NO_NEED = 4,
 	SS_CW_SUCCESS = 3,
 	SS_NOTHING_TO_DO = 2,
@@ -290,7 +303,7 @@
 extern const char *drbd_conn_str(enum drbd_conns);
 extern const char *drbd_role_str(enum drbd_role);
 extern const char *drbd_disk_str(enum drbd_disk_state);
-extern const char *drbd_set_st_err_str(enum drbd_state_ret_codes);
+extern const char *drbd_set_st_err_str(enum drbd_state_rv);
 
 #define SHARED_SECRET_MAX 64
 

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 4ac33f3..bb264a5 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h

@@ -16,7 +16,8 @@
 #define DEBUG_RANGE_CHECK 0
 
 #define DRBD_MINOR_COUNT_MIN 1
-#define DRBD_MINOR_COUNT_MAX 255
+#define DRBD_MINOR_COUNT_MAX 256
+#define DRBD_MINOR_COUNT_DEF 32
 
 #define DRBD_DIALOG_REFRESH_MIN 0
 #define DRBD_DIALOG_REFRESH_MAX 600
@@ -129,6 +130,7 @@
 #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
 #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
 #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
+#define DRBD_ON_CONGESTION_DEF OC_BLOCK
 
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MAX 128
@@ -154,5 +156,13 @@
 #define DRBD_C_MIN_RATE_MAX     (4 << 20)
 #define DRBD_C_MIN_RATE_DEF     4096
 
+#define DRBD_CONG_FILL_MIN	0
+#define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */
+#define DRBD_CONG_FILL_DEF	0
+
+#define DRBD_CONG_EXTENTS_MIN	DRBD_AL_EXTENTS_MIN
+#define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX
+#define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF
+
 #undef RANGE
 #endif

diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index ade9110..ab6159e4 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h

@@ -56,6 +56,9 @@
 	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict)
 	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo)
 	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size)
+	NL_INTEGER(	81,	T_MAY_IGNORE,	on_congestion)
+	NL_INTEGER(	82,	T_MAY_IGNORE,	cong_fill)
+	NL_INTEGER(	83,	T_MAY_IGNORE,	cong_extents)
 	  /* 59 addr_family was available in GIT, never released */
 	NL_BIT(		60,	T_MANDATORY,	mind_af)
 	NL_BIT(		27,	T_MAY_IGNORE,	want_lose)
@@ -66,7 +69,9 @@
 	NL_BIT(		70,	T_MANDATORY,	dry_run)
 )
 
-NL_PACKET(disconnect, 6, )
+NL_PACKET(disconnect, 6,
+	NL_BIT(		84,	T_MAY_IGNORE,	force)
+)
 
 NL_PACKET(resize, 7,
 	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
@@ -143,9 +148,13 @@
        NL_BIT(		63,	T_MANDATORY,	clear_bm)
 )
 
+#ifdef NL_RESPONSE
+NL_RESPONSE(return_code_only, 27)
+#endif
+
 #undef NL_PACKET
 #undef NL_INTEGER
 #undef NL_INT64
 #undef NL_BIT
 #undef NL_STRING
-
+#undef NL_RESPONSE

diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
index fcdff84..f14a165 100644
--- a/include/linux/drbd_tag_magic.h
+++ b/include/linux/drbd_tag_magic.h

@@ -7,6 +7,7 @@
 /* declare packet_type enums */
 enum packet_types {
 #define NL_PACKET(name, number, fields) P_ ## name = number,
+#define NL_RESPONSE(name, number) P_ ## name = number,
 #define NL_INTEGER(pn, pr, member)
 #define NL_INT64(pn, pr, member)
 #define NL_BIT(pn, pr, member)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b297f28..ae757bc 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h

@@ -648,6 +648,9 @@
 
 #include <linux/rculist.h>
 
+/* needed by dev_disable_lro() */
+extern int __ethtool_set_flags(struct net_device *dev, u32 flags);
+
 struct ethtool_rx_ntuple_flow_spec_container {
 	struct ethtool_rx_ntuple_flow_spec fs;
 	struct list_head list;

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ce7e185..b677bd7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -1636,7 +1636,7 @@
 };
 
 /*
- * Inode state bits.  Protected by inode_lock.
+ * Inode state bits.  Protected by inode->i_lock
  *
  * Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
  * I_DIRTY_DATASYNC and I_DIRTY_PAGES.

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1d3577f..5d876c9 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h

@@ -28,6 +28,7 @@
 #include <asm/ptrace.h>
 #include <asm/irq_regs.h>
 
+struct seq_file;
 struct irq_desc;
 struct irq_data;
 typedef	void (*irq_flow_handler_t)(unsigned int irq,
@@ -270,6 +271,7 @@
  * @irq_set_wake:	enable/disable power-management wake-on of an IRQ
  * @irq_bus_lock:	function to lock access to slow bus (i2c) chips
  * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
+ * @irq_print_chip:	optional to print special chip info in show_interrupts
  * @flags:		chip specific flags
  *
  * @release:		release function solely used by UML
@@ -317,6 +319,8 @@
 	void		(*irq_bus_lock)(struct irq_data *data);
 	void		(*irq_bus_sync_unlock)(struct irq_data *data);
 
+	void		(*irq_print_chip)(struct irq_data *data, struct seq_file *p);
+
 	unsigned long	flags;
 
 	/* Currently used only by UML, might disappear one day.*/

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 0021837..15e6c39 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h

@@ -100,13 +100,6 @@
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
 
-/* Will be removed once the last users in power and sh are gone */
-extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
-static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
-{
-	return desc;
-}
-
 #ifdef CONFIG_GENERIC_HARDIRQS
 
 static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
@@ -178,7 +171,36 @@
 	return desc->action != NULL;
 }
 
+/* caller has locked the irq_desc and both params are valid */
+static inline void __irq_set_handler_locked(unsigned int irq,
+					    irq_flow_handler_t handler)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	desc->handle_irq = handler;
+}
+
+/* caller has locked the irq_desc and both params are valid */
+static inline void
+__irq_set_chip_handler_name_locked(unsigned int irq, struct irq_chip *chip,
+				   irq_flow_handler_t handler, const char *name)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+	irq_desc_get_irq_data(desc)->chip = chip;
+	desc->handle_irq = handler;
+	desc->name = name;
+}
+
 #ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+static inline void __set_irq_handler_unlocked(int irq,
+					      irq_flow_handler_t handler)
+{
+	__irq_set_handler_locked(irq, handler);
+}
+
 static inline int irq_balancing_disabled(unsigned int irq)
 {
 	struct irq_desc *desc;
@@ -188,14 +210,13 @@
 }
 #endif
 
-/* caller has locked the irq_desc and both params are valid */
-static inline void __set_irq_handler_unlocked(int irq,
-					      irq_flow_handler_t handler)
+static inline void
+irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class)
 {
-	struct irq_desc *desc;
+	struct irq_desc *desc = irq_to_desc(irq);
 
-	desc = irq_to_desc(irq);
-	desc->handle_irq = handler;
+	if (desc)
+		lockdep_set_class(&desc->lock, class);
 }
 
 #ifdef CONFIG_IRQ_PREFLOW_FASTEOI

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 27e79c2..a32dcae 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h

@@ -432,13 +432,35 @@
 	int			h_err;
 
 	/* Flags [no locking] */
-	unsigned int	h_sync:		1;	/* sync-on-close */
-	unsigned int	h_jdata:	1;	/* force data journaling */
-	unsigned int	h_aborted:	1;	/* fatal error on handle */
+	unsigned int	h_sync:1;	/* sync-on-close */
+	unsigned int	h_jdata:1;	/* force data journaling */
+	unsigned int	h_aborted:1;	/* fatal error on handle */
+	unsigned int	h_cowing:1;	/* COWing block to snapshot */
+
+	/* Number of buffers requested by user:
+	 * (before adding the COW credits factor) */
+	unsigned int	h_base_credits:14;
+
+	/* Number of buffers the user is allowed to dirty:
+	 * (counts only buffers dirtied when !h_cowing) */
+	unsigned int	h_user_credits:14;
+
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	h_lockdep_map;
 #endif
+
+#ifdef CONFIG_JBD2_DEBUG
+	/* COW debugging counters: */
+	unsigned int h_cow_moved; /* blocks moved to snapshot */
+	unsigned int h_cow_copied; /* blocks copied to snapshot */
+	unsigned int h_cow_ok_jh; /* blocks already COWed during current
+				     transaction */
+	unsigned int h_cow_ok_bitmap; /* blocks not set in COW bitmap */
+	unsigned int h_cow_ok_mapped;/* blocks already mapped in snapshot */
+	unsigned int h_cow_bitmaps; /* COW bitmaps created */
+	unsigned int h_cow_excluded; /* blocks set in exclude bitmap */
+#endif
 };
 
 

diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
index 525aac3..44e95d0 100644
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h

@@ -41,6 +41,13 @@
 	unsigned b_modified;
 
 	/*
+	 * This feild tracks the last transaction id in which this buffer
+	 * has been cowed
+	 * [jbd_lock_bh_state()]
+	 */
+	unsigned b_cow_tid;
+
+	/*
 	 * Copy of the buffer data frozen for writing to the log.
 	 * [jbd_lock_bh_state()]
 	 */

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index d8e9b3d..0df513b 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h

@@ -36,6 +36,7 @@
 
 /* Look up a kernel symbol and return it in a text buffer. */
 extern int sprint_symbol(char *buffer, unsigned long address);
+extern int sprint_backtrace(char *buffer, unsigned long address);
 
 /* Look up a kernel symbol and print it to the kernel messages. */
 extern void __print_symbol(const char *fmt, unsigned long address);
@@ -79,6 +80,12 @@
 	return 0;
 }
 
+static inline int sprint_backtrace(char *buffer, unsigned long addr)
+{
+	*buffer = '\0';
+	return 0;
+}
+
 static inline int lookup_symbol_name(unsigned long addr, char *symname)
 {
 	return -ERANGE;

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 383811d..61e0340 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h

@@ -145,6 +145,9 @@
 extern void led_trigger_unregister_simple(struct led_trigger *trigger);
 extern void led_trigger_event(struct led_trigger *trigger,
 				enum led_brightness event);
+extern void led_trigger_blink(struct led_trigger *trigger,
+			      unsigned long *delay_on,
+			      unsigned long *delay_off);
 
 #else
 

diff --git a/include/linux/mfd/ab8500.h b/include/linux/mfd/ab8500.h
index 56f8dea..6e4f77e 100644
--- a/include/linux/mfd/ab8500.h
+++ b/include/linux/mfd/ab8500.h

@@ -139,17 +139,23 @@
 	u8 oldmask[AB8500_NUM_IRQ_REGS];
 };
 
+struct regulator_reg_init;
 struct regulator_init_data;
 
 /**
  * struct ab8500_platform_data - AB8500 platform data
  * @irq_base: start of AB8500 IRQs, AB8500_NR_IRQS will be used
  * @init: board-specific initialization after detection of ab8500
+ * @num_regulator_reg_init: number of regulator init registers
+ * @regulator_reg_init: regulator init registers
+ * @num_regulator: number of regulators
  * @regulator: machine-specific constraints for regulators
  */
 struct ab8500_platform_data {
 	int irq_base;
 	void (*init) (struct ab8500 *);
+	int num_regulator_reg_init;
+	struct ab8500_regulator_reg_init *regulator_reg_init;
 	int num_regulator;
 	struct regulator_init_data *regulator;
 };

diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 1408bf8..ad1b19a 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h

@@ -63,6 +63,24 @@
 extern int mfd_cell_disable(struct platform_device *pdev);
 
 /*
+ * "Clone" multiple platform devices for a single cell. This is to be used
+ * for devices that have multiple users of a cell.  For example, if an mfd
+ * driver wants the cell "foo" to be used by a GPIO driver, an MTD driver,
+ * and a platform driver, the following bit of code would be use after first
+ * calling mfd_add_devices():
+ *
+ * const char *fclones[] = { "foo-gpio", "foo-mtd" };
+ * err = mfd_clone_cells("foo", fclones, ARRAY_SIZE(fclones));
+ *
+ * Each driver (MTD, GPIO, and platform driver) would then register
+ * platform_drivers for "foo-mtd", "foo-gpio", and "foo", respectively.
+ * The cell's .enable/.disable hooks should be used to deal with hardware
+ * resource contention.
+ */
+extern int mfd_clone_cell(const char *cell, const char **clones,
+		size_t n_clones);
+
+/*
  * Given a platform device that's been created by mfd_add_devices(), fetch
  * the mfd_cell that created it.
  */
@@ -87,13 +105,4 @@
 
 extern void mfd_remove_devices(struct device *parent);
 
-/*
- * For MFD drivers with clients sharing access to resources, these create
- * multiple platform devices per cell.  Contention handling must still be
- * handled via drivers (ie, with enable/disable hooks).
- */
-extern int mfd_shared_platform_driver_register(struct platform_driver *drv,
-		const char *cellname);
-extern void mfd_shared_platform_driver_unregister(struct platform_driver *drv);
-
 #endif

diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index 93a9477..69d1010 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h

@@ -24,6 +24,8 @@
 
 #include <linux/i2c.h>
 
+#define MAX8997_REG_INVALID	(0xff)
+
 enum max8997_pmic_reg {
 	MAX8997_REG_PMIC_ID0	= 0x00,
 	MAX8997_REG_PMIC_ID1	= 0x01,
@@ -313,6 +315,7 @@
 #define MAX8997_REG_BUCK2DVS(x)	(MAX8997_REG_BUCK2DVS1 + (x) - 1)
 #define MAX8997_REG_BUCK5DVS(x)	(MAX8997_REG_BUCK5DVS1 + (x) - 1)
 
+#define MAX8997_NUM_GPIO	12
 struct max8997_dev {
 	struct device *dev;
 	struct i2c_client *i2c; /* 0xcc / PMIC, Battery Control, and FLASH */
@@ -324,11 +327,19 @@
 	int type;
 	struct platform_device *battery; /* battery control (not fuel gauge) */
 
+	int irq;
+	int ono;
+	int irq_base;
 	bool wakeup;
+	struct mutex irqlock;
+	int irq_masks_cur[MAX8997_IRQ_GROUP_NR];
+	int irq_masks_cache[MAX8997_IRQ_GROUP_NR];
 
 	/* For hibernation */
 	u8 reg_dump[MAX8997_REG_PMIC_END + MAX8997_MUIC_REG_END +
 		MAX8997_HAPTIC_REG_END];
+
+	bool gpio_status[MAX8997_NUM_GPIO];
 };
 
 enum max8997_types {
@@ -336,6 +347,10 @@
 	TYPE_MAX8966,
 };
 
+extern int max8997_irq_init(struct max8997_dev *max8997);
+extern void max8997_irq_exit(struct max8997_dev *max8997);
+extern int max8997_irq_resume(struct max8997_dev *max8997);
+
 extern int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest);
 extern int max8997_bulk_read(struct i2c_client *i2c, u8 reg, int count,
 				u8 *buf);
@@ -344,4 +359,10 @@
 				u8 *buf);
 extern int max8997_update_reg(struct i2c_client *i2c, u8 reg, u8 val, u8 mask);
 
+#define MAX8997_GPIO_INT_BOTH	(0x3 << 4)
+#define MAX8997_GPIO_INT_RISE	(0x2 << 4)
+#define MAX8997_GPIO_INT_FALL	(0x1 << 4)
+
+#define MAX8997_GPIO_INT_MASK	(0x3 << 4)
+#define MAX8997_GPIO_DATA_MASK	(0x1 << 2)
 #endif /*  __LINUX_MFD_MAX8997_PRIV_H */

diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index cb671b3..60931d0 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h

@@ -78,8 +78,11 @@
 };
 
 struct max8997_platform_data {
-	bool wakeup;
-	/* IRQ: Not implemented */
+	/* IRQ */
+	int irq_base;
+	int ono;
+	int wakeup;
+
 	/* ---- PMIC ---- */
 	struct max8997_regulator_data *regulators;
 	int num_regulators;

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 0492146..8985768 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h

@@ -39,6 +39,11 @@
 
 #include <asm/atomic.h>
 
+#define MAX_MSIX_P_PORT		17
+#define MAX_MSIX		64
+#define MSIX_LEGACY_SZ		4
+#define MIN_MSIX_P_PORT		5
+
 enum {
 	MLX4_FLAG_MSI_X		= 1 << 0,
 	MLX4_FLAG_OLD_PORT_CMDS	= 1 << 1,
@@ -145,8 +150,10 @@
 };
 
 enum mlx4_protocol {
-	MLX4_PROTOCOL_IB,
-	MLX4_PROTOCOL_EN,
+	MLX4_PROT_IB_IPV6 = 0,
+	MLX4_PROT_ETH,
+	MLX4_PROT_IB_IPV4,
+	MLX4_PROT_FCOE
 };
 
 enum {
@@ -173,6 +180,12 @@
 	MLX4_VLAN_REGULAR
 };
 
+enum mlx4_steer_type {
+	MLX4_MC_STEER = 0,
+	MLX4_UC_STEER,
+	MLX4_NUM_STEERS
+};
+
 enum {
 	MLX4_NUM_FEXCH          = 64 * 1024,
 };
@@ -223,6 +236,7 @@
 	int			num_eqs;
 	int			reserved_eqs;
 	int			num_comp_vectors;
+	int			comp_pool;
 	int			num_mpts;
 	int			num_mtt_segs;
 	int			mtts_per_seg;
@@ -245,6 +259,9 @@
 	u16			stat_rate_support;
 	int			udp_rss;
 	int			loopback_support;
+	int			vep_uc_steering;
+	int			vep_mc_steering;
+	int			wol;
 	u8			port_width_cap[MLX4_MAX_PORTS + 1];
 	int			max_gso_sz;
 	int                     reserved_qps_cnt[MLX4_NUM_QP_REGION];
@@ -334,6 +351,17 @@
 struct mlx4_uar {
 	unsigned long		pfn;
 	int			index;
+	struct list_head	bf_list;
+	unsigned		free_bf_bmap;
+	void __iomem	       *map;
+	void __iomem	       *bf_map;
+};
+
+struct mlx4_bf {
+	unsigned long		offset;
+	int			buf_size;
+	struct mlx4_uar	       *uar;
+	void __iomem	       *reg;
 };
 
 struct mlx4_cq {
@@ -415,7 +443,7 @@
 	unsigned long		flags;
 	struct mlx4_caps	caps;
 	struct radix_tree_root	qp_table_tree;
-	u32			rev_id;
+	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
 };
 
@@ -461,6 +489,8 @@
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
 void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf);
+void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf);
 
 int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
 		  struct mlx4_mtt *mtt);
@@ -508,9 +538,15 @@
 			  int block_mcast_loopback, enum mlx4_protocol protocol);
 int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  enum mlx4_protocol protocol);
+int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
 
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index);
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap);
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int qpn);
+int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac, u8 wrap);
 
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
@@ -526,5 +562,10 @@
 int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
 int mlx4_test_interrupts(struct mlx4_dev *dev);
+int mlx4_assign_eq(struct mlx4_dev *dev, char* name , int* vector);
+void mlx4_release_eq(struct mlx4_dev *dev, int vec);
+
+int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);
+int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
 
 #endif /* MLX4_DEVICE_H */

diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 0eeb2a1..9e9eb21 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h

@@ -303,6 +303,7 @@
 
 enum {
 	MLX4_INLINE_ALIGN	= 64,
+	MLX4_INLINE_SEG		= 1 << 31,
 };
 
 struct mlx4_wqe_inline_seg {

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f9535b2..7606d7d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -861,7 +861,7 @@
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
 /*
- * Flags passed to __show_mem() and __show_free_areas() to suppress output in
+ * Flags passed to show_mem() and __show_free_areas() to suppress output in
  * various contexts.
  */
 #define SHOW_MEM_FILTER_NODES	(0x0001u)	/* filter disallowed nodes */
@@ -1360,8 +1360,7 @@
 extern void calculate_zone_inactive_ratio(struct zone *zone);
 extern void mem_init(void);
 extern void __init mmap_init(void);
-extern void show_mem(void);
-extern void __show_mem(unsigned int flags);
+extern void show_mem(unsigned int flags);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;

diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index 26529eb..1bbd9f2 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h

@@ -36,6 +36,7 @@
 	struct mtd_info *mtd;
 	struct mutex lock;
 	int devnum;
+	bool bg_stop;
 	unsigned long size;
 	int readonly;
 	int open;
@@ -62,6 +63,7 @@
 		     unsigned long block, char *buffer);
 	int (*discard)(struct mtd_blktrans_dev *dev,
 		       unsigned long block, unsigned nr_blocks);
+	void (*background)(struct mtd_blktrans_dev *dev);
 
 	/* Block layer ioctls */
 	int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo);
@@ -85,6 +87,7 @@
 extern int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr);
 extern int add_mtd_blktrans_dev(struct mtd_blktrans_dev *dev);
 extern int del_mtd_blktrans_dev(struct mtd_blktrans_dev *dev);
+extern int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev);
 
 
 #endif /* __MTD_TRANS_H__ */

diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index a9baee6..0d823f2 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h

@@ -535,6 +535,7 @@
 #define CFI_MFR_CONTINUATION	0x007F
 
 #define CFI_MFR_AMD		0x0001
+#define CFI_MFR_AMIC		0x0037
 #define CFI_MFR_ATMEL		0x001F
 #define CFI_MFR_EON		0x001C
 #define CFI_MFR_FUJITSU		0x0004

diff --git a/include/linux/mtd/latch-addr-flash.h b/include/linux/mtd/latch-addr-flash.h
new file mode 100644
index 0000000..e94b8e1
--- /dev/null
+++ b/include/linux/mtd/latch-addr-flash.h

@@ -0,0 +1,29 @@
+/*
+ * Interface for NOR flash driver whose high address lines are latched
+ *
+ * Copyright © 2008 MontaVista Software, Inc. <source@mvista.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+#ifndef __LATCH_ADDR_FLASH__
+#define __LATCH_ADDR_FLASH__
+
+struct map_info;
+struct mtd_partition;
+
+struct latch_addr_flash_data {
+	unsigned int		width;
+	unsigned int		size;
+
+	int			(*init)(void *data, int cs);
+	void			(*done)(void *data);
+	void			(*set_window)(unsigned long offset, void *data);
+	void			*data;
+
+	unsigned int		nr_parts;
+	struct mtd_partition	*parts;
+};
+
+#endif

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 1f489b2..ae67ef5 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h

@@ -140,6 +140,7 @@
 	NAND_ECC_HW,
 	NAND_ECC_HW_SYNDROME,
 	NAND_ECC_HW_OOB_FIRST,
+	NAND_ECC_SOFT_BCH,
 } nand_ecc_modes_t;
 
 /*
@@ -339,6 +340,7 @@
  * @prepad:	padding information for syndrome based ecc generators
  * @postpad:	padding information for syndrome based ecc generators
  * @layout:	ECC layout control struct pointer
+ * @priv:	pointer to private ecc control data
  * @hwctl:	function to control hardware ecc generator. Must only
  *		be provided if an hardware ECC is available
  * @calculate:	function for ecc calculation or readback from ecc hardware
@@ -362,6 +364,7 @@
 	int prepad;
 	int postpad;
 	struct nand_ecclayout	*layout;
+	void *priv;
 	void (*hwctl)(struct mtd_info *mtd, int mode);
 	int (*calculate)(struct mtd_info *mtd, const uint8_t *dat,
 			uint8_t *ecc_code);

diff --git a/include/linux/mtd/nand_bch.h b/include/linux/mtd/nand_bch.h
new file mode 100644
index 0000000..74acf53
--- /dev/null
+++ b/include/linux/mtd/nand_bch.h

@@ -0,0 +1,72 @@
+/*
+ * Copyright © 2011 Ivan Djelic <ivan.djelic@parrot.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This file is the header for the NAND BCH ECC implementation.
+ */
+
+#ifndef __MTD_NAND_BCH_H__
+#define __MTD_NAND_BCH_H__
+
+struct mtd_info;
+struct nand_bch_control;
+
+#if defined(CONFIG_MTD_NAND_ECC_BCH)
+
+static inline int mtd_nand_has_bch(void) { return 1; }
+
+/*
+ * Calculate BCH ecc code
+ */
+int nand_bch_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+			   u_char *ecc_code);
+
+/*
+ * Detect and correct bit errors
+ */
+int nand_bch_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc,
+			  u_char *calc_ecc);
+/*
+ * Initialize BCH encoder/decoder
+ */
+struct nand_bch_control *
+nand_bch_init(struct mtd_info *mtd, unsigned int eccsize,
+	      unsigned int eccbytes, struct nand_ecclayout **ecclayout);
+/*
+ * Release BCH encoder/decoder resources
+ */
+void nand_bch_free(struct nand_bch_control *nbc);
+
+#else /* !CONFIG_MTD_NAND_ECC_BCH */
+
+static inline int mtd_nand_has_bch(void) { return 0; }
+
+static inline int
+nand_bch_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+		       u_char *ecc_code)
+{
+	return -1;
+}
+
+static inline int
+nand_bch_correct_data(struct mtd_info *mtd, unsigned char *buf,
+		      unsigned char *read_ecc, unsigned char *calc_ecc)
+{
+	return -1;
+}
+
+static inline struct nand_bch_control *
+nand_bch_init(struct mtd_info *mtd, unsigned int eccsize,
+	      unsigned int eccbytes, struct nand_ecclayout **ecclayout)
+{
+	return NULL;
+}
+
+static inline void nand_bch_free(struct nand_bch_control *nbc) {}
+
+#endif /* CONFIG_MTD_NAND_ECC_BCH */
+
+#endif /* __MTD_NAND_BCH_H__ */

diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index ae418e4..52b6f18 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h

@@ -198,6 +198,7 @@
 #define ONENAND_SKIP_UNLOCK_CHECK	(0x0100)
 #define ONENAND_PAGEBUF_ALLOC		(0x1000)
 #define ONENAND_OOBBUF_ALLOC		(0x2000)
+#define ONENAND_SKIP_INITIAL_UNLOCKING	(0x4000)
 
 #define ONENAND_IS_4KB_PAGE(this)					\
 	(this->options & ONENAND_HAS_4KB_PAGE)

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 134716e..b528f6d 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h

@@ -550,6 +550,7 @@
 	NFSPROC4_CLNT_SETACL,
 	NFSPROC4_CLNT_FS_LOCATIONS,
 	NFSPROC4_CLNT_RELEASE_LOCKOWNER,
+	NFSPROC4_CLNT_SECINFO,
 
 	/* nfs41 */
 	NFSPROC4_CLNT_EXCHANGE_ID,
@@ -560,6 +561,7 @@
 	NFSPROC4_CLNT_RECLAIM_COMPLETE,
 	NFSPROC4_CLNT_LAYOUTGET,
 	NFSPROC4_CLNT_GETDEVICEINFO,
+	NFSPROC4_CLNT_LAYOUTCOMMIT,
 };
 
 /* nfs41 types */

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f88522b..1b93b9c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h

@@ -33,6 +33,8 @@
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
+#define FLUSH_COND_STABLE	32	/* conditional stable write - only stable
+					 * if everything fits in one RPC */
 
 #ifdef __KERNEL__
 
@@ -93,8 +95,13 @@
 	int error;
 
 	struct list_head list;
+};
 
+struct nfs_open_dir_context {
+	struct rpc_cred *cred;
 	__u64 dir_cookie;
+	__u64 dup_cookie;
+	int duped;
 };
 
 /*
@@ -191,6 +198,7 @@
 
 	/* pNFS layout information */
 	struct pnfs_layout_hdr *layout;
+	atomic_t		commits_outstanding;
 #endif /* CONFIG_NFS_V4*/
 #ifdef CONFIG_NFS_FSCACHE
 	struct fscache_cookie	*fscache;
@@ -219,6 +227,8 @@
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
 #define NFS_INO_COMMIT		(7)		/* inode is committing unstable writes */
+#define NFS_INO_PNFS_COMMIT	(8)		/* use pnfs code for commit */
+#define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 90907ad..8023e4e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h

@@ -33,11 +33,15 @@
 	PG_CLEAN,
 	PG_NEED_COMMIT,
 	PG_NEED_RESCHED,
+	PG_PNFS_COMMIT,
 };
 
 struct nfs_inode;
 struct nfs_page {
-	struct list_head	wb_list;	/* Defines state of page: */
+	union {
+		struct list_head	wb_list;	/* Defines state of page: */
+		struct pnfs_layout_segment *wb_commit_lseg; /* Used when PG_PNFS_COMMIT set */
+	};
 	struct page		*wb_page;	/* page to read in/write out */
 	struct nfs_open_context	*wb_context;	/* File state context info */
 	struct nfs_lock_context	*wb_lock_context;	/* lock context info */
@@ -57,6 +61,7 @@
 	size_t			pg_count;
 	size_t			pg_bsize;
 	unsigned int		pg_base;
+	char			pg_moreio;
 
 	struct inode		*pg_inode;
 	int			(*pg_doio)(struct nfs_pageio_descriptor *);

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2c2c67d..78b101e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h

@@ -3,6 +3,7 @@
 
 #include <linux/nfsacl.h>
 #include <linux/nfs3.h>
+#include <linux/sunrpc/gss_api.h>
 
 /*
  * To change the maximum rsize and wsize supported by the NFS client, adjust
@@ -14,6 +15,9 @@
 #define NFS_DEF_FILE_IO_SIZE	(4096U)
 #define NFS_MIN_FILE_IO_SIZE	(1024U)
 
+/* Forward declaration for NFS v3 */
+struct nfs4_secinfo_flavors;
+
 struct nfs_fsid {
 	uint64_t		major;
 	uint64_t		minor;
@@ -78,6 +82,7 @@
 #define NFS_ATTR_FATTR_CHANGE		(1U << 17)
 #define NFS_ATTR_FATTR_PRECHANGE	(1U << 18)
 #define NFS_ATTR_FATTR_V4_REFERRAL	(1U << 19)	/* NFSv4 referral */
+#define NFS_ATTR_FATTR_MOUNTPOINT	(1U << 20)	/* Treat as mountpoint */
 
 #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
 		| NFS_ATTR_FATTR_MODE \
@@ -190,8 +195,9 @@
 #define PNFS_LAYOUT_MAXSIZE 4096
 
 struct nfs4_layoutdriver_data {
+	struct page **pages;
+	__u32 pglen;
 	__u32 len;
-	void *buf;
 };
 
 struct pnfs_layout_range {
@@ -209,6 +215,7 @@
 	struct nfs_open_context *ctx;
 	struct nfs4_sequence_args seq_args;
 	nfs4_stateid stateid;
+	struct nfs4_layoutdriver_data layout;
 };
 
 struct nfs4_layoutget_res {
@@ -216,8 +223,8 @@
 	struct pnfs_layout_range range;
 	__u32 type;
 	nfs4_stateid stateid;
-	struct nfs4_layoutdriver_data layout;
 	struct nfs4_sequence_res seq_res;
+	struct nfs4_layoutdriver_data *layoutp;
 };
 
 struct nfs4_layoutget {
@@ -236,6 +243,29 @@
 	struct nfs4_sequence_res seq_res;
 };
 
+struct nfs4_layoutcommit_args {
+	nfs4_stateid stateid;
+	__u64 lastbytewritten;
+	struct inode *inode;
+	const u32 *bitmask;
+	struct nfs4_sequence_args seq_args;
+};
+
+struct nfs4_layoutcommit_res {
+	struct nfs_fattr *fattr;
+	const struct nfs_server *server;
+	struct nfs4_sequence_res seq_res;
+};
+
+struct nfs4_layoutcommit_data {
+	struct rpc_task task;
+	struct nfs_fattr fattr;
+	struct pnfs_layout_segment *lseg;
+	struct rpc_cred *cred;
+	struct nfs4_layoutcommit_args args;
+	struct nfs4_layoutcommit_res res;
+};
+
 /*
  * Arguments to the open call.
  */
@@ -936,6 +966,38 @@
 	struct nfs4_sequence_res	seq_res;
 };
 
+struct nfs4_secinfo_oid {
+	unsigned int len;
+	char data[GSS_OID_MAX_LEN];
+};
+
+struct nfs4_secinfo_gss {
+	struct nfs4_secinfo_oid sec_oid4;
+	unsigned int qop4;
+	unsigned int service;
+};
+
+struct nfs4_secinfo_flavor {
+	unsigned int 		flavor;
+	struct nfs4_secinfo_gss	gss;
+};
+
+struct nfs4_secinfo_flavors {
+	unsigned int num_flavors;
+	struct nfs4_secinfo_flavor flavors[0];
+};
+
+struct nfs4_secinfo_arg {
+	const struct nfs_fh		*dir_fh;
+	const struct qstr		*name;
+	struct nfs4_sequence_args	seq_args;
+};
+
+struct nfs4_secinfo_res {
+	struct nfs4_secinfo_flavors	*flavors;
+	struct nfs4_sequence_res	seq_res;
+};
+
 #endif /* CONFIG_NFS_V4 */
 
 struct nfstime4 {
@@ -1040,6 +1102,7 @@
 	struct nfs_writeres	res;		/* result struct */
 	struct pnfs_layout_segment *lseg;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_commit_index;
 	const struct rpc_call_ops *mds_ops;
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 #ifdef CONFIG_NFS_V4
@@ -1071,7 +1134,7 @@
 			    struct nfs_fattr *);
 	int	(*setattr) (struct dentry *, struct nfs_fattr *,
 			    struct iattr *);
-	int	(*lookup)  (struct inode *, struct qstr *,
+	int	(*lookup)  (struct rpc_clnt *clnt, struct inode *, struct qstr *,
 			    struct nfs_fh *, struct nfs_fattr *);
 	int	(*access)  (struct inode *, struct nfs_access_entry *);
 	int	(*readlink)(struct inode *, struct page *, unsigned int,
@@ -1118,6 +1181,7 @@
 				struct iattr *iattr);
 	int	(*init_client) (struct nfs_client *, const struct rpc_timeout *,
 				const char *, rpc_authflavor_t, int);
+	int	(*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
 };
 
 /*

diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index ce68105..67cb3ae 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h

@@ -26,6 +26,7 @@
 extern void pcie_aspm_init_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_exit_link_state(struct pci_dev *pdev);
 extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
+extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
 extern void pcie_clear_aspm(void);
 extern void pcie_no_aspm(void);
@@ -39,6 +40,9 @@
 static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev)
 {
 }
+static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
+{
+}
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 16c9f2e..96f70d7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h

@@ -1002,12 +1002,11 @@
 #endif
 
 #ifndef CONFIG_PCIEASPM
-static inline int pcie_aspm_enabled(void)
-{
-	return 0;
-}
+static inline int pcie_aspm_enabled(void) { return 0; }
+static inline bool pcie_aspm_support_enabled(void) { return false; }
 #else
 extern int pcie_aspm_enabled(void);
+extern bool pcie_aspm_support_enabled(void);
 #endif
 
 #ifdef CONFIG_PCIEAER

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f495c01..311b4dc 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -938,9 +938,7 @@
 	struct list_head		rotation_list;
 	int				jiffies_interval;
 	struct pmu			*active_pmu;
-#ifdef CONFIG_CGROUP_PERF
 	struct perf_cgroup		*cgrp;
-#endif
 };
 
 struct perf_output_handle {

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 6618216..512e091 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h

@@ -529,13 +529,19 @@
  */
 
 #ifdef CONFIG_PM_SLEEP
-extern void device_pm_lock(void);
+#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
+extern int sysdev_suspend(pm_message_t state);
 extern int sysdev_resume(void);
+#else
+static inline int sysdev_suspend(pm_message_t state) { return 0; }
+static inline int sysdev_resume(void) { return 0; }
+#endif
+
+extern void device_pm_lock(void);
 extern void dpm_resume_noirq(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
 
 extern void device_pm_unlock(void);
-extern int sysdev_suspend(pm_message_t state);
 extern int dpm_suspend_noirq(pm_message_t state);
 extern int dpm_suspend_start(pm_message_t state);
 

diff --git a/include/linux/power/bq20z75.h b/include/linux/power/bq20z75.h
new file mode 100644
index 0000000..b0843b6
--- /dev/null
+++ b/include/linux/power/bq20z75.h

@@ -0,0 +1,39 @@
+/*
+ * Gas Gauge driver for TI's BQ20Z75
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __LINUX_POWER_BQ20Z75_H_
+#define __LINUX_POWER_BQ20Z75_H_
+
+#include <linux/power_supply.h>
+#include <linux/types.h>
+
+/**
+ * struct bq20z75_platform_data - platform data for bq20z75 devices
+ * @battery_detect:		GPIO which is used to detect battery presence
+ * @battery_detect_present:	gpio state when battery is present (0 / 1)
+ * @i2c_retry_count:		# of times to retry on i2c IO failure
+ */
+struct bq20z75_platform_data {
+	int battery_detect;
+	int battery_detect_present;
+	int i2c_retry_count;
+};
+
+#endif

diff --git a/include/linux/power/bq27x00_battery.h b/include/linux/power/bq27x00_battery.h
new file mode 100644
index 0000000..a857f71
--- /dev/null
+++ b/include/linux/power/bq27x00_battery.h

@@ -0,0 +1,19 @@
+#ifndef __LINUX_BQ27X00_BATTERY_H__
+#define __LINUX_BQ27X00_BATTERY_H__
+
+/**
+ * struct bq27000_plaform_data - Platform data for bq27000 devices
+ * @name: Name of the battery. If NULL the driver will fallback to "bq27000".
+ * @read: HDQ read callback.
+ *	This function should provide access to the HDQ bus the battery is
+ *	connected to.
+ *	The first parameter is a pointer to the battery device, the second the
+ *	register to be read. The return value should either be the content of
+ *	the passed register or an error value.
+ */
+struct bq27000_platform_data {
+	const char *name;
+	int (*read)(struct device *dev, unsigned int);
+};
+
+#endif

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 7d73256..204c18d 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h

@@ -173,6 +173,8 @@
 	char *full_trig_name;
 	struct led_trigger *online_trig;
 	char *online_trig_name;
+	struct led_trigger *charging_blink_full_solid_trig;
+	char *charging_blink_full_solid_trig_name;
 #endif
 };
 
@@ -213,4 +215,49 @@
 /* For APM emulation, think legacy userspace. */
 extern struct class *power_supply_class;
 
+static inline bool power_supply_is_amp_property(enum power_supply_property psp)
+{
+	switch (psp) {
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_CHARGE_EMPTY_DESIGN:
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_CHARGE_EMPTY:
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_AVG:
+	case POWER_SUPPLY_PROP_CHARGE_COUNTER:
+	case POWER_SUPPLY_PROP_CURRENT_MAX:
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+	case POWER_SUPPLY_PROP_CURRENT_AVG:
+		return 1;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static inline bool power_supply_is_watt_property(enum power_supply_property psp)
+{
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN:
+	case POWER_SUPPLY_PROP_ENERGY_FULL:
+	case POWER_SUPPLY_PROP_ENERGY_EMPTY:
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+	case POWER_SUPPLY_PROP_ENERGY_AVG:
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX:
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN:
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+	case POWER_SUPPLY_PROP_VOLTAGE_AVG:
+	case POWER_SUPPLY_PROP_POWER_NOW:
+		return 1;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 #endif /* __LINUX_POWER_SUPPLY_H__ */

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index eb354f6..26f9e36 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h

@@ -277,7 +277,7 @@
 		/*
 		 * Mark inode fully dirty. Since we are allocating blocks, inode
 		 * would become fully dirty soon anyway and it reportedly
-		 * reduces inode_lock contention.
+		 * reduces lock contention.
 		 */
 		mark_inode_dirty(inode);
 	}

diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h
index 6a210f1..76579f9 100644
--- a/include/linux/regulator/ab8500.h
+++ b/include/linux/regulator/ab8500.h

@@ -3,8 +3,8 @@
  *
  * License Terms: GNU General Public License v2
  *
- * Author: Sundar Iyer <sundar.iyer@stericsson.com> for ST-Ericsson
- *
+ * Authors: Sundar Iyer <sundar.iyer@stericsson.com> for ST-Ericsson
+ *          Bengt Jonsson <bengt.g.jonsson@stericsson.com> for ST-Ericsson
  */
 
 #ifndef __LINUX_MFD_AB8500_REGULATOR_H
@@ -17,6 +17,7 @@
 	AB8500_LDO_AUX3,
 	AB8500_LDO_INTCORE,
 	AB8500_LDO_TVOUT,
+	AB8500_LDO_USB,
 	AB8500_LDO_AUDIO,
 	AB8500_LDO_ANAMIC1,
 	AB8500_LDO_ANAMIC2,
@@ -24,4 +25,50 @@
 	AB8500_LDO_ANA,
 	AB8500_NUM_REGULATORS,
 };
+
+/* AB8500 register initialization */
+struct ab8500_regulator_reg_init {
+	int id;
+	u8 value;
+};
+
+#define INIT_REGULATOR_REGISTER(_id, _value)	\
+	{					\
+		.id = _id,			\
+		.value = _value,		\
+	}
+
+/* AB8500 registers */
+enum ab8500_regulator_reg {
+	AB8500_REGUREQUESTCTRL2,
+	AB8500_REGUREQUESTCTRL3,
+	AB8500_REGUREQUESTCTRL4,
+	AB8500_REGUSYSCLKREQ1HPVALID1,
+	AB8500_REGUSYSCLKREQ1HPVALID2,
+	AB8500_REGUHWHPREQ1VALID1,
+	AB8500_REGUHWHPREQ1VALID2,
+	AB8500_REGUHWHPREQ2VALID1,
+	AB8500_REGUHWHPREQ2VALID2,
+	AB8500_REGUSWHPREQVALID1,
+	AB8500_REGUSWHPREQVALID2,
+	AB8500_REGUSYSCLKREQVALID1,
+	AB8500_REGUSYSCLKREQVALID2,
+	AB8500_REGUMISC1,
+	AB8500_VAUDIOSUPPLY,
+	AB8500_REGUCTRL1VAMIC,
+	AB8500_VPLLVANAREGU,
+	AB8500_VREFDDR,
+	AB8500_EXTSUPPLYREGU,
+	AB8500_VAUX12REGU,
+	AB8500_VRF1VAUX3REGU,
+	AB8500_VAUX1SEL,
+	AB8500_VAUX2SEL,
+	AB8500_VRF1VAUX3SEL,
+	AB8500_REGUCTRL2SPARE,
+	AB8500_REGUCTRLDISCH,
+	AB8500_REGUCTRLDISCH2,
+	AB8500_VSMPS1SEL1,
+	AB8500_NUM_REGULATOR_REGISTERS,
+};
+
 #endif

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 7954f6b..9e87c1c 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h

@@ -153,6 +153,8 @@
 int regulator_is_supported_voltage(struct regulator *regulator,
 				   int min_uV, int max_uV);
 int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV);
+int regulator_set_voltage_time(struct regulator *regulator,
+			       int old_uV, int new_uV);
 int regulator_get_voltage(struct regulator *regulator);
 int regulator_sync_voltage(struct regulator *regulator);
 int regulator_set_current_limit(struct regulator *regulator,

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index b8ed16a..6c433b8 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h

@@ -63,7 +63,11 @@
  *                    when running with the specified parameters.
  *
  * @enable_time: Time taken for the regulator voltage output voltage to
- *               stabalise after being enabled, in microseconds.
+ *               stabilise after being enabled, in microseconds.
+ * @set_voltage_time_sel: Time taken for the regulator voltage output voltage
+ *               to stabilise after being set to a new value, in microseconds.
+ *               The function provides the from and to voltage selector, the
+ *               function should return the worst case.
  *
  * @set_suspend_voltage: Set the voltage for the regulator when the system
  *                       is suspended.
@@ -103,8 +107,11 @@
 	int (*set_mode) (struct regulator_dev *, unsigned int mode);
 	unsigned int (*get_mode) (struct regulator_dev *);
 
-	/* Time taken to enable the regulator */
+	/* Time taken to enable or set voltage on the regulator */
 	int (*enable_time) (struct regulator_dev *);
+	int (*set_voltage_time_sel) (struct regulator_dev *,
+				     unsigned int old_selector,
+				     unsigned int new_selector);
 
 	/* report regulator status ... most other accessors report
 	 * control inputs, this reports results of combining inputs

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 761c745..c4c4fc4 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h

@@ -186,6 +186,7 @@
 };
 
 int regulator_suspend_prepare(suspend_state_t state);
+int regulator_suspend_finish(void);
 
 #ifdef CONFIG_REGULATOR
 void regulator_has_full_constraints(void);

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b8369d5..83bd2e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -517,7 +517,7 @@
 struct autogroup;
 
 /*
- * NOTE! "signal_struct" does not have it's own
+ * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
  * implies a shared sighand_struct, so locking
  * sighand_struct is always a proper superset of

diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index 5d8048b..332da61 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h

@@ -126,6 +126,9 @@
 /* Similar, but get by pseudoflavor. */
 struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32);
 
+/* Fill in an array with a list of supported pseudoflavors */
+int gss_mech_list_pseudoflavors(u32 *);
+
 /* Just increments the mechanism's reference count and returns its input: */
 struct gss_api_mech * gss_mech_get(struct gss_api_mech *);
 

diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index 1154c29..8a75da5 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h

@@ -33,12 +33,13 @@
 	const char *name;
 	struct list_head	drivers;
 	struct sysdev_class_attribute **attrs;
-
+	struct kset		kset;
+#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
 	/* Default operations for these types of devices */
 	int	(*shutdown)(struct sys_device *);
 	int	(*suspend)(struct sys_device *, pm_message_t state);
 	int	(*resume)(struct sys_device *);
-	struct kset		kset;
+#endif
 };
 
 struct sysdev_class_attribute {
@@ -76,9 +77,11 @@
 	struct list_head	entry;
 	int	(*add)(struct sys_device *);
 	int	(*remove)(struct sys_device *);
+#ifndef CONFIG_ARCH_NO_SYSDEV_OPS
 	int	(*shutdown)(struct sys_device *);
 	int	(*suspend)(struct sys_device *, pm_message_t state);
 	int	(*resume)(struct sys_device *);
+#endif
 };
 
 

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0ead399..17e7ccc 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h

@@ -9,7 +9,7 @@
 
 struct backing_dev_info;
 
-extern spinlock_t inode_lock;
+extern spinlock_t inode_wb_list_lock;
 
 /*
  * fs/fs-writeback.c

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 60f7876..b2b9d28 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h

@@ -486,7 +486,8 @@
  * @plink_state: mesh peer link state
  * @signal: signal strength of last received packet in dBm
  * @signal_avg: signal strength average in dBm
- * @txrate: current unicast bitrate to this station
+ * @txrate: current unicast bitrate from this station
+ * @rxrate: current unicast bitrate to this station
  * @rx_packets: packets received from this station
  * @tx_packets: packets transmitted to this station
  * @tx_retries: cumulative retry counts

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 642a80b..c850e5f 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h

@@ -70,7 +70,7 @@
 extern void			ip6_route_input(struct sk_buff *skb);
 
 extern struct dst_entry *	ip6_route_output(struct net *net,
-						 struct sock *sk,
+						 const struct sock *sk,
 						 struct flowi6 *fl6);
 
 extern int			ip6_route_init(void);

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a1a8580..e5d66ec 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h

@@ -51,7 +51,6 @@
 	struct fib_info		*nh_parent;
 	unsigned		nh_flags;
 	unsigned char		nh_scope;
-	unsigned char		nh_cfg_scope;
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			nh_weight;
 	int			nh_power;
@@ -62,6 +61,7 @@
 	int			nh_oif;
 	__be32			nh_gw;
 	__be32			nh_saddr;
+	int			nh_saddr_genid;
 };
 
 /*
@@ -74,9 +74,10 @@
 	struct net		*fib_net;
 	int			fib_treeref;
 	atomic_t		fib_clntref;
-	int			fib_dead;
 	unsigned		fib_flags;
-	int			fib_protocol;
+	unsigned char		fib_dead;
+	unsigned char		fib_protocol;
+	unsigned char		fib_scope;
 	__be32			fib_prefsrc;
 	u32			fib_priority;
 	u32			*fib_metrics;
@@ -141,12 +142,19 @@
 
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
-#define FIB_RES_SADDR(res)		(FIB_RES_NH(res).nh_saddr)
+extern __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
+
+#define FIB_RES_SADDR(net, res)				\
+	((FIB_RES_NH(res).nh_saddr_genid ==		\
+	  atomic_read(&(net)->ipv4.dev_addr_genid)) ?	\
+	 FIB_RES_NH(res).nh_saddr :			\
+	 fib_info_update_nh_saddr((net), &FIB_RES_NH(res)))
 #define FIB_RES_GW(res)			(FIB_RES_NH(res).nh_gw)
 #define FIB_RES_DEV(res)		(FIB_RES_NH(res).nh_dev)
 #define FIB_RES_OIF(res)		(FIB_RES_NH(res).nh_oif)
 
-#define FIB_RES_PREFSRC(res)		((res).fi->fib_prefsrc ? : FIB_RES_SADDR(res))
+#define FIB_RES_PREFSRC(net, res)	((res).fi->fib_prefsrc ? : \
+					 FIB_RES_SADDR(net, res))
 
 struct fib_table {
 	struct hlist_node tb_hlist;

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8650e7b..cefe1b3 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h

@@ -1160,7 +1160,7 @@
  * @napi_weight: weight used for NAPI polling.  You must specify an
  *	appropriate value here if a napi_poll operation is provided
  *	by your driver.
-
+ *
  * @max_rx_aggregation_subframes: maximum buffer size (number of
  *	sub-frames) to be used for A-MPDU block ack receiver
  *	aggregation.

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index e2e2ef5..542195d 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h

@@ -55,6 +55,7 @@
 	int current_rt_cache_rebuild_count;
 
 	atomic_t rt_genid;
+	atomic_t dev_addr_genid;
 
 #ifdef CONFIG_IP_MROUTE
 #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES

diff --git a/include/net/route.h b/include/net/route.h
index 30d6cae..f88429c 100644
--- a/include/net/route.h
+++ b/include/net/route.h

@@ -207,6 +207,7 @@
 
 struct in_ifaddr;
 extern void fib_add_ifaddr(struct in_ifaddr *);
+extern void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
 
 static inline void ip_rt_put(struct rtable * rt)
 {
@@ -269,8 +270,8 @@
 		struct flowi4 fl4 = {
 			.flowi4_oif = rt->rt_oif,
 			.flowi4_mark = rt->rt_mark,
-			.daddr = rt->rt_key_dst,
-			.saddr = rt->rt_key_src,
+			.daddr = rt->rt_dst,
+			.saddr = rt->rt_src,
 			.flowi4_tos = rt->rt_tos,
 			.flowi4_proto = protocol,
 			.fl4_sport = sport,

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a9505b6..b931f02 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h

@@ -25,6 +25,7 @@
 enum qdisc_state_t {
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
+	__QDISC_STATE_THROTTLED,
 };
 
 /*
@@ -32,7 +33,6 @@
  */
 enum qdisc___state_t {
 	__QDISC___STATE_RUNNING = 1,
-	__QDISC___STATE_THROTTLED = 2,
 };
 
 struct qdisc_size_table {
@@ -106,17 +106,17 @@
 
 static inline bool qdisc_is_throttled(const struct Qdisc *qdisc)
 {
-	return (qdisc->__state & __QDISC___STATE_THROTTLED) ? true : false;
+	return test_bit(__QDISC_STATE_THROTTLED, &qdisc->state) ? true : false;
 }
 
 static inline void qdisc_throttled(struct Qdisc *qdisc)
 {
-	qdisc->__state |= __QDISC___STATE_THROTTLED;
+	set_bit(__QDISC_STATE_THROTTLED, &qdisc->state);
 }
 
 static inline void qdisc_unthrottled(struct Qdisc *qdisc)
 {
-	qdisc->__state &= ~__QDISC___STATE_THROTTLED;
+	clear_bit(__QDISC_STATE_THROTTLED, &qdisc->state);
 }
 
 struct Qdisc_class_ops {

diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h
index 741ae7e..e6b9fd2 100644
--- a/include/scsi/libiscsi_tcp.h
+++ b/include/scsi/libiscsi_tcp.h

@@ -47,6 +47,7 @@
 	struct scatterlist	*sg;
 	void			*sg_mapped;
 	unsigned int		sg_offset;
+	bool			atomic_mapped;
 
 	iscsi_segment_done_fn_t	*done;
 };

diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index f171c65..2d3ec50 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h

@@ -462,7 +462,7 @@
 }
 static inline int scsi_device_enclosure(struct scsi_device *sdev)
 {
-	return sdev->inquiry[6] & (1<<6);
+	return sdev->inquiry ? (sdev->inquiry[6] & (1<<6)) : 1;
 }
 
 static inline int scsi_device_protection(struct scsi_device *sdev)

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 430a9cc..e1bad11 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h

@@ -1031,9 +1031,7 @@
 #define snd_pcm_lib_mmap_iomem	NULL
 #endif
 
-int snd_pcm_lib_mmap_noncached(struct snd_pcm_substream *substream,
-			       struct vm_area_struct *area);
-#define snd_pcm_lib_mmap_vmalloc	snd_pcm_lib_mmap_noncached
+#define snd_pcm_lib_mmap_vmalloc NULL
 
 static inline void snd_pcm_limit_isa_dma_size(int dma, size_t *max)
 {

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 0828b6c8..c15ed50 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h

@@ -9,7 +9,7 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 
-#define TARGET_CORE_MOD_VERSION		"v4.0.0-rc6"
+#define TARGET_CORE_MOD_VERSION		"v4.0.0-rc7-ml"
 #define SHUTDOWN_SIGS	(sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGABRT))
 
 /* Used by transport_generic_allocate_iovecs() */
@@ -239,7 +239,7 @@
 } ____cacheline_aligned;
 
 struct t10_alua_lu_gp_member {
-	int lu_gp_assoc:1;
+	bool lu_gp_assoc;
 	atomic_t lu_gp_mem_ref_cnt;
 	spinlock_t lu_gp_mem_lock;
 	struct t10_alua_lu_gp *lu_gp;
@@ -271,7 +271,7 @@
 } ____cacheline_aligned;
 
 struct t10_alua_tg_pt_gp_member {
-	int tg_pt_gp_assoc:1;
+	bool tg_pt_gp_assoc;
 	atomic_t tg_pt_gp_mem_ref_cnt;
 	spinlock_t tg_pt_gp_mem_lock;
 	struct t10_alua_tg_pt_gp *tg_pt_gp;
@@ -336,7 +336,7 @@
 	int pr_res_type;
 	int pr_res_scope;
 	/* Used for fabric initiator WWPNs using a ISID */
-	int isid_present_at_reg:1;
+	bool isid_present_at_reg;
 	u32 pr_res_mapped_lun;
 	u32 pr_aptpl_target_lun;
 	u32 pr_res_generation;
@@ -418,7 +418,7 @@
 	unsigned long long	t_task_lba;
 	int			t_tasks_failed;
 	int			t_tasks_fua;
-	int			t_tasks_bidi:1;
+	bool			t_tasks_bidi;
 	u32			t_task_cdbs;
 	u32			t_tasks_check;
 	u32			t_tasks_no;
@@ -470,7 +470,7 @@
 	u8		task_flags;
 	int		task_error_status;
 	int		task_state_flags;
-	int		task_padded_sg:1;
+	bool		task_padded_sg;
 	unsigned long long	task_lba;
 	u32		task_no;
 	u32		task_sectors;
@@ -494,8 +494,8 @@
 	struct list_head t_state_list;
 } ____cacheline_aligned;
 
-#define TASK_CMD(task)	((struct se_cmd *)task->task_se_cmd)
-#define TASK_DEV(task)	((struct se_device *)task->se_dev)
+#define TASK_CMD(task)	((task)->task_se_cmd)
+#define TASK_DEV(task)	((task)->se_dev)
 
 struct se_cmd {
 	/* SAM response code being sent to initiator */
@@ -551,8 +551,8 @@
 	void (*transport_complete_callback)(struct se_cmd *);
 } ____cacheline_aligned;
 
-#define T_TASK(cmd)     ((struct se_transport_task *)(cmd->t_task))
-#define CMD_TFO(cmd) ((struct target_core_fabric_ops *)cmd->se_tfo)
+#define T_TASK(cmd)     ((cmd)->t_task)
+#define CMD_TFO(cmd)	((cmd)->se_tfo)
 
 struct se_tmr_req {
 	/* Task Management function to be preformed */
@@ -583,7 +583,7 @@
 struct se_node_acl {
 	char			initiatorname[TRANSPORT_IQN_LEN];
 	/* Used to signal demo mode created ACL, disabled by default */
-	int			dynamic_node_acl:1;
+	bool			dynamic_node_acl;
 	u32			queue_depth;
 	u32			acl_index;
 	u64			num_cmds;
@@ -601,7 +601,8 @@
 	struct config_group	acl_attrib_group;
 	struct config_group	acl_auth_group;
 	struct config_group	acl_param_group;
-	struct config_group	*acl_default_groups[4];
+	struct config_group	acl_fabric_stat_group;
+	struct config_group	*acl_default_groups[5];
 	struct list_head	acl_list;
 	struct list_head	acl_sess_list;
 } ____cacheline_aligned;
@@ -615,13 +616,19 @@
 	struct list_head	sess_acl_list;
 } ____cacheline_aligned;
 
-#define SE_SESS(cmd)		((struct se_session *)(cmd)->se_sess)
-#define SE_NODE_ACL(sess)	((struct se_node_acl *)(sess)->se_node_acl)
+#define SE_SESS(cmd)		((cmd)->se_sess)
+#define SE_NODE_ACL(sess)	((sess)->se_node_acl)
 
 struct se_device;
 struct se_transform_info;
 struct scatterlist;
 
+struct se_ml_stat_grps {
+	struct config_group	stat_group;
+	struct config_group	scsi_auth_intr_group;
+	struct config_group	scsi_att_intr_port_group;
+};
+
 struct se_lun_acl {
 	char			initiatorname[TRANSPORT_IQN_LEN];
 	u32			mapped_lun;
@@ -629,10 +636,13 @@
 	struct se_lun		*se_lun;
 	struct list_head	lacl_list;
 	struct config_group	se_lun_group;
+	struct se_ml_stat_grps	ml_stat_grps;
 }  ____cacheline_aligned;
 
+#define ML_STAT_GRPS(lacl)	(&(lacl)->ml_stat_grps)
+
 struct se_dev_entry {
-	int			def_pr_registered:1;
+	bool			def_pr_registered;
 	/* See transport_lunflags_table */
 	u32			lun_flags;
 	u32			deve_cmds;
@@ -693,6 +703,13 @@
 	struct config_group da_group;
 } ____cacheline_aligned;
 
+struct se_dev_stat_grps {
+	struct config_group stat_group;
+	struct config_group scsi_dev_group;
+	struct config_group scsi_tgt_dev_group;
+	struct config_group scsi_lu_group;
+};
+
 struct se_subsystem_dev {
 /* Used for struct se_subsystem_dev-->se_dev_alias, must be less than PAGE_SIZE */
 #define SE_DEV_ALIAS_LEN		512
@@ -716,11 +733,14 @@
 	struct config_group se_dev_group;
 	/* For T10 Reservations */
 	struct config_group se_dev_pr_group;
+	/* For target_core_stat.c groups */
+	struct se_dev_stat_grps dev_stat_grps;
 } ____cacheline_aligned;
 
 #define T10_ALUA(su_dev)	(&(su_dev)->t10_alua)
 #define T10_RES(su_dev)		(&(su_dev)->t10_reservation)
 #define T10_PR_OPS(su_dev)	(&(su_dev)->t10_reservation.pr_ops)
+#define DEV_STAT_GRP(dev)	(&(dev)->dev_stat_grps)
 
 struct se_device {
 	/* Set to 1 if thread is NOT sleeping on thread_sem */
@@ -803,8 +823,8 @@
 	struct list_head	g_se_dev_list;
 }  ____cacheline_aligned;
 
-#define SE_DEV(cmd)		((struct se_device *)(cmd)->se_lun->lun_se_dev)
-#define SU_DEV(dev)		((struct se_subsystem_dev *)(dev)->se_sub_dev)
+#define SE_DEV(cmd)		((cmd)->se_lun->lun_se_dev)
+#define SU_DEV(dev)		((dev)->se_sub_dev)
 #define DEV_ATTRIB(dev)		(&(dev)->se_sub_dev->se_dev_attrib)
 #define DEV_T10_WWN(dev)	(&(dev)->se_sub_dev->t10_wwn)
 
@@ -832,7 +852,14 @@
 	struct se_subsystem_api *transport;
 }  ____cacheline_aligned;
 
-#define SE_HBA(d)		((struct se_hba *)(d)->se_hba)
+#define SE_HBA(dev)		((dev)->se_hba)
+
+struct se_port_stat_grps {
+	struct config_group stat_group;
+	struct config_group scsi_port_group;
+	struct config_group scsi_tgt_port_group;
+	struct config_group scsi_transport_group;
+};
 
 struct se_lun {
 	/* See transport_lun_status_table */
@@ -848,11 +875,13 @@
 	struct list_head	lun_cmd_list;
 	struct list_head	lun_acl_list;
 	struct se_device	*lun_se_dev;
+	struct se_port		*lun_sep;
 	struct config_group	lun_group;
-	struct se_port	*lun_sep;
+	struct se_port_stat_grps port_stat_grps;
 } ____cacheline_aligned;
 
-#define SE_LUN(c)		((struct se_lun *)(c)->se_lun)
+#define SE_LUN(cmd)		((cmd)->se_lun)
+#define PORT_STAT_GRP(lun)	(&(lun)->port_stat_grps)
 
 struct scsi_port_stats {
        u64     cmd_pdus;
@@ -919,11 +948,13 @@
 	struct config_group	tpg_param_group;
 } ____cacheline_aligned;
 
-#define TPG_TFO(se_tpg)	((struct target_core_fabric_ops *)(se_tpg)->se_tpg_tfo)
+#define TPG_TFO(se_tpg)	((se_tpg)->se_tpg_tfo)
 
 struct se_wwn {
 	struct target_fabric_configfs *wwn_tf;
 	struct config_group	wwn_group;
+	struct config_group	*wwn_default_groups[2];
+	struct config_group	fabric_stat_group;
 } ____cacheline_aligned;
 
 struct se_global {

diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h
index 40e6e74..6125095 100644
--- a/include/target/target_core_configfs.h
+++ b/include/target/target_core_configfs.h

@@ -14,10 +14,12 @@
 struct target_fabric_configfs_template {
 	struct config_item_type tfc_discovery_cit;
 	struct config_item_type	tfc_wwn_cit;
+	struct config_item_type tfc_wwn_fabric_stats_cit;
 	struct config_item_type tfc_tpg_cit;
 	struct config_item_type tfc_tpg_base_cit;
 	struct config_item_type tfc_tpg_lun_cit;
 	struct config_item_type tfc_tpg_port_cit;
+	struct config_item_type tfc_tpg_port_stat_cit;
 	struct config_item_type tfc_tpg_np_cit;
 	struct config_item_type tfc_tpg_np_base_cit;
 	struct config_item_type tfc_tpg_attrib_cit;
@@ -27,7 +29,9 @@
 	struct config_item_type tfc_tpg_nacl_attrib_cit;
 	struct config_item_type tfc_tpg_nacl_auth_cit;
 	struct config_item_type tfc_tpg_nacl_param_cit;
+	struct config_item_type tfc_tpg_nacl_stat_cit;
 	struct config_item_type tfc_tpg_mappedlun_cit;
+	struct config_item_type tfc_tpg_mappedlun_stat_cit;
 };
 
 struct target_fabric_configfs {

diff --git a/include/target/target_core_fabric_ops.h b/include/target/target_core_fabric_ops.h
index f3ac12b..5eb8b1a 100644
--- a/include/target/target_core_fabric_ops.h
+++ b/include/target/target_core_fabric_ops.h

@@ -8,7 +8,7 @@
 	 * for scatterlist chaining using transport_do_task_sg_link(),
 	 * disabled by default
 	 */
-	int task_sg_chaining:1;
+	bool task_sg_chaining;
 	char *(*get_fabric_name)(void);
 	u8 (*get_fabric_proto_ident)(struct se_portal_group *);
 	char *(*tpg_get_wwn)(struct se_portal_group *);

diff --git a/include/target/target_core_tmr.h b/include/target/target_core_tmr.h
index 6c8248b..bd55968 100644
--- a/include/target/target_core_tmr.h
+++ b/include/target/target_core_tmr.h

@@ -1,37 +1,29 @@
 #ifndef TARGET_CORE_TMR_H
 #define TARGET_CORE_TMR_H
 
-/* task management function values */
-#ifdef ABORT_TASK
-#undef ABORT_TASK
-#endif /* ABORT_TASK */
-#define ABORT_TASK				1
-#ifdef ABORT_TASK_SET
-#undef ABORT_TASK_SET
-#endif /* ABORT_TASK_SET */
-#define ABORT_TASK_SET				2
-#ifdef CLEAR_ACA
-#undef CLEAR_ACA
-#endif /* CLEAR_ACA */
-#define CLEAR_ACA				3
-#ifdef CLEAR_TASK_SET
-#undef CLEAR_TASK_SET
-#endif /* CLEAR_TASK_SET */
-#define CLEAR_TASK_SET				4
-#define LUN_RESET				5
-#define TARGET_WARM_RESET			6
-#define TARGET_COLD_RESET			7
-#define TASK_REASSIGN				8
+/* fabric independent task management function values */
+enum tcm_tmreq_table {
+	TMR_ABORT_TASK		= 1,
+	TMR_ABORT_TASK_SET	= 2,
+	TMR_CLEAR_ACA		= 3,
+	TMR_CLEAR_TASK_SET	= 4,
+	TMR_LUN_RESET		= 5,
+	TMR_TARGET_WARM_RESET	= 6,
+	TMR_TARGET_COLD_RESET	= 7,
+	TMR_FABRIC_TMR		= 255,
+};
 
-/* task management response values */
-#define TMR_FUNCTION_COMPLETE			0
-#define TMR_TASK_DOES_NOT_EXIST			1
-#define TMR_LUN_DOES_NOT_EXIST			2
-#define TMR_TASK_STILL_ALLEGIANT		3
-#define TMR_TASK_FAILOVER_NOT_SUPPORTED		4
-#define TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED	5
-#define TMR_FUNCTION_AUTHORIZATION_FAILED	6
-#define TMR_FUNCTION_REJECTED			255
+/* fabric independent task management response values */
+enum tcm_tmrsp_table {
+	TMR_FUNCTION_COMPLETE		= 0,
+	TMR_TASK_DOES_NOT_EXIST		= 1,
+	TMR_LUN_DOES_NOT_EXIST		= 2,
+	TMR_TASK_STILL_ALLEGIANT	= 3,
+	TMR_TASK_FAILOVER_NOT_SUPPORTED	= 4,
+	TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED	= 5,
+	TMR_FUNCTION_AUTHORIZATION_FAILED = 6,
+	TMR_FUNCTION_REJECTED		= 255,
+};
 
 extern struct kmem_cache *se_tmr_req_cache;
 

diff --git a/include/target/target_core_transport.h b/include/target/target_core_transport.h
index 2e8ec51..59aa464 100644
--- a/include/target/target_core_transport.h
+++ b/include/target/target_core_transport.h

@@ -109,6 +109,8 @@
 struct se_mem;
 struct se_subsystem_api;
 
+extern struct kmem_cache *se_mem_cache;
+
 extern int init_se_global(void);
 extern void release_se_global(void);
 extern void init_scsi_index_table(void);
@@ -190,6 +192,8 @@
 extern int transport_generic_do_tmr(struct se_cmd *);
 /* From target_core_alua.c */
 extern int core_alua_check_nonop_delay(struct se_cmd *);
+/* From target_core_cdb.c */
+extern int transport_emulate_control_cdb(struct se_task *);
 
 /*
  * Each se_transport_task_t can have N number of possible struct se_task's

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index e5e345f..e09592d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h

@@ -21,8 +21,7 @@
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	uid_t,	uid			)
@@ -31,8 +30,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->uid	= inode->i_uid;
@@ -41,9 +39,9 @@
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
-		  __entry->dev_major, __entry->dev_minor,
-		  (unsigned long) __entry->ino, __entry->mode,
-		  __entry->uid, __entry->gid,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->mode, __entry->uid, __entry->gid,
 		  (unsigned long long) __entry->blocks)
 );
 
@@ -53,21 +51,19 @@
 	TP_ARGS(dir, mode),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	dir			)
 		__field(	umode_t, mode			)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(dir->i_sb->s_dev);
-		__entry->dev_minor = MINOR(dir->i_sb->s_dev);
+		__entry->dev	= dir->i_sb->s_dev;
 		__entry->dir	= dir->i_ino;
 		__entry->mode	= mode;
 	),
 
 	TP_printk("dev %d,%d dir %lu mode 0%o",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->dir, __entry->mode)
 );
 
@@ -77,23 +73,21 @@
 	TP_ARGS(inode, dir, mode),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	dir			)
 		__field(	umode_t, mode			)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->dir	= dir->i_ino;
 		__entry->mode	= mode;
 	),
 
 	TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  (unsigned long) __entry->dir, __entry->mode)
 );
@@ -104,21 +98,19 @@
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	int,	nlink			)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->nlink	= inode->i_nlink;
 	),
 
 	TP_printk("dev %d,%d ino %lu nlink %d",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->nlink)
 );
 
@@ -128,21 +120,19 @@
 	TP_ARGS(inode, drop),
 
 	TP_STRUCT__entry(
-		__field(	int,	dev_major		)
-		__field(	int,	dev_minor		)
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	int,	drop			)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->drop	= drop;
 	),
 
 	TP_printk("dev %d,%d ino %lu drop %d",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->drop)
 );
 
@@ -152,21 +142,19 @@
 	TP_ARGS(inode, IP),
 
 	TP_STRUCT__entry(
-		__field(	int,	dev_major		)
-		__field(	int,	dev_minor		)
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(unsigned long,	ip			)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->ip	= IP;
 	),
 
 	TP_printk("dev %d,%d ino %lu caller %pF",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, (void *)__entry->ip)
 );
 
@@ -176,21 +164,19 @@
 	TP_ARGS(inode, new_size),
 
 	TP_STRUCT__entry(
-		__field(	int,	dev_major		)
-		__field(	int,	dev_minor		)
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	loff_t,	new_size		)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor	= MINOR(inode->i_sb->s_dev);
+		__entry->dev		= inode->i_sb->s_dev;
 		__entry->ino		= inode->i_ino;
 		__entry->new_size	= new_size;
 	),
 
 	TP_printk("dev %d,%d ino %lu new_size %lld",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  (long long) __entry->new_size)
 );
@@ -203,8 +189,7 @@
 	TP_ARGS(inode, pos, len, flags),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	loff_t,	pos			)
 		__field(	unsigned int, len		)
@@ -212,8 +197,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->pos	= pos;
 		__entry->len	= len;
@@ -221,7 +205,7 @@
 	),
 
 	TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->pos, __entry->len, __entry->flags)
 );
@@ -249,8 +233,7 @@
 	TP_ARGS(inode, pos, len, copied),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	loff_t,	pos			)
 		__field(	unsigned int, len		)
@@ -258,8 +241,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->pos	= pos;
 		__entry->len	= len;
@@ -267,9 +249,9 @@
 	),
 
 	TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
-		  __entry->dev_major, __entry->dev_minor,
-		  (unsigned long) __entry->ino, __entry->pos,
-		  __entry->len, __entry->copied)
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->copied)
 );
 
 DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
@@ -310,22 +292,20 @@
 	TP_ARGS(inode, page),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	pgoff_t, index			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->index	= page->index;
 	),
 
 	TP_printk("dev %d,%d ino %lu page_index %lu",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->index)
 );
 
@@ -335,43 +315,39 @@
 	TP_ARGS(inode, wbc),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	long,	nr_to_write		)
 		__field(	long,	pages_skipped		)
 		__field(	loff_t,	range_start		)
 		__field(	loff_t,	range_end		)
+		__field(	int,	sync_mode		)
 		__field(	char,	for_kupdate		)
-		__field(	char,	for_reclaim		)
 		__field(	char,	range_cyclic		)
 		__field(       pgoff_t,	writeback_index		)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor	= MINOR(inode->i_sb->s_dev);
+		__entry->dev		= inode->i_sb->s_dev;
 		__entry->ino		= inode->i_ino;
 		__entry->nr_to_write	= wbc->nr_to_write;
 		__entry->pages_skipped	= wbc->pages_skipped;
 		__entry->range_start	= wbc->range_start;
 		__entry->range_end	= wbc->range_end;
+		__entry->sync_mode	= wbc->sync_mode;
 		__entry->for_kupdate	= wbc->for_kupdate;
-		__entry->for_reclaim	= wbc->for_reclaim;
 		__entry->range_cyclic	= wbc->range_cyclic;
 		__entry->writeback_index = inode->i_mapping->writeback_index;
 	),
 
 	TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
-		  "range_start %llu range_end %llu "
-		  "for_kupdate %d for_reclaim %d "
-		  "range_cyclic %d writeback_index %lu",
-		  __entry->dev_major, __entry->dev_minor,
+		  "range_start %llu range_end %llu sync_mode %d"
+		  "for_kupdate %d range_cyclic %d writeback_index %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
-		  __entry->range_end,
-		  __entry->for_kupdate, __entry->for_reclaim,
-		  __entry->range_cyclic,
+		  __entry->range_end, __entry->sync_mode,
+		  __entry->for_kupdate, __entry->range_cyclic,
 		  (unsigned long) __entry->writeback_index)
 );
 
@@ -381,8 +357,7 @@
 	TP_ARGS(inode, mpd),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	__u64,	b_blocknr		)
 		__field(	__u32,	b_size			)
@@ -390,11 +365,11 @@
 		__field(	unsigned long,	first_page	)
 		__field(	int,	io_done			)
 		__field(	int,	pages_written		)
+		__field(	int,	sync_mode		)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor	= MINOR(inode->i_sb->s_dev);
+		__entry->dev		= inode->i_sb->s_dev;
 		__entry->ino		= inode->i_ino;
 		__entry->b_blocknr	= mpd->b_blocknr;
 		__entry->b_size		= mpd->b_size;
@@ -402,14 +377,18 @@
 		__entry->first_page	= mpd->first_page;
 		__entry->io_done	= mpd->io_done;
 		__entry->pages_written	= mpd->pages_written;
+		__entry->sync_mode	= mpd->wbc->sync_mode;
 	),
 
-	TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
-		  __entry->dev_major, __entry->dev_minor,
+	TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x "
+		  "first_page %lu io_done %d pages_written %d sync_mode %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->b_blocknr, __entry->b_size,
 		  __entry->b_state, __entry->first_page,
-		  __entry->io_done, __entry->pages_written)
+		  __entry->io_done, __entry->pages_written,
+		  __entry->sync_mode
+                  )
 );
 
 TRACE_EVENT(ext4_da_writepages_result,
@@ -419,35 +398,100 @@
 	TP_ARGS(inode, wbc, ret, pages_written),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	int,	ret			)
 		__field(	int,	pages_written		)
 		__field(	long,	pages_skipped		)
+		__field(	int,	sync_mode		)
 		__field(	char,	more_io			)	
 		__field(       pgoff_t,	writeback_index		)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor	= MINOR(inode->i_sb->s_dev);
+		__entry->dev		= inode->i_sb->s_dev;
 		__entry->ino		= inode->i_ino;
 		__entry->ret		= ret;
 		__entry->pages_written	= pages_written;
 		__entry->pages_skipped	= wbc->pages_skipped;
+		__entry->sync_mode	= wbc->sync_mode;
 		__entry->more_io	= wbc->more_io;
 		__entry->writeback_index = inode->i_mapping->writeback_index;
 	),
 
-	TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
-		  __entry->dev_major, __entry->dev_minor,
+	TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld "
+		  " more_io %d sync_mode %d writeback_index %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->ret,
 		  __entry->pages_written, __entry->pages_skipped,
-		  __entry->more_io,
+		  __entry->more_io, __entry->sync_mode,
 		  (unsigned long) __entry->writeback_index)
 );
 
+DECLARE_EVENT_CLASS(ext4__page_op,
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page),
+
+	TP_STRUCT__entry(
+		__field(	pgoff_t, index			)
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+
+	),
+
+	TP_fast_assign(
+		__entry->index	= page->index;
+		__entry->ino	= page->mapping->host->i_ino;
+		__entry->dev	= page->mapping->host->i_sb->s_dev;
+	),
+
+	TP_printk("dev %d,%d ino %lu page_index %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->index)
+);
+
+DEFINE_EVENT(ext4__page_op, ext4_readpage,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext4__page_op, ext4_releasepage,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page)
+);
+
+TRACE_EVENT(ext4_invalidatepage,
+	TP_PROTO(struct page *page, unsigned long offset),
+
+	TP_ARGS(page, offset),
+
+	TP_STRUCT__entry(
+		__field(	pgoff_t, index			)
+		__field(	unsigned long, offset		)
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+
+	),
+
+	TP_fast_assign(
+		__entry->index	= page->index;
+		__entry->offset	= offset;
+		__entry->ino	= page->mapping->host->i_ino;
+		__entry->dev	= page->mapping->host->i_sb->s_dev;
+	),
+
+	TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->index, __entry->offset)
+);
+
 TRACE_EVENT(ext4_discard_blocks,
 	TP_PROTO(struct super_block *sb, unsigned long long blk,
 			unsigned long long count),
@@ -455,22 +499,20 @@
 	TP_ARGS(sb, blk, count),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	__u64,	blk			)
 		__field(	__u64,	count			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(sb->s_dev);
-		__entry->dev_minor = MINOR(sb->s_dev);
+		__entry->dev	= sb->s_dev;
 		__entry->blk	= blk;
 		__entry->count	= count;
 	),
 
 	TP_printk("dev %d,%d blk %llu count %llu",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->blk, __entry->count)
 );
 
@@ -481,8 +523,7 @@
 	TP_ARGS(ac, pa),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	__u64,	pa_pstart		)
 		__field(	__u32,	pa_len			)
@@ -491,8 +532,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(ac->ac_sb->s_dev);
-		__entry->dev_minor	= MINOR(ac->ac_sb->s_dev);
+		__entry->dev		= ac->ac_sb->s_dev;
 		__entry->ino		= ac->ac_inode->i_ino;
 		__entry->pa_pstart	= pa->pa_pstart;
 		__entry->pa_len		= pa->pa_len;
@@ -500,9 +540,9 @@
 	),
 
 	TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu",
-		  __entry->dev_major, __entry->dev_minor,
-		  (unsigned long) __entry->ino, __entry->pa_pstart,
-		  __entry->pa_len, __entry->pa_lstart)
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
 );
 
 DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
@@ -530,8 +570,7 @@
 	TP_ARGS(sb, inode, pa, block, count),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	__u64,	block			)
 		__field(	__u32,	count			)
@@ -539,16 +578,16 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(sb->s_dev);
-		__entry->dev_minor	= MINOR(sb->s_dev);
+		__entry->dev		= sb->s_dev;
 		__entry->ino		= inode->i_ino;
 		__entry->block		= block;
 		__entry->count		= count;
 	),
 
 	TP_printk("dev %d,%d ino %lu block %llu count %u",
-		  __entry->dev_major, __entry->dev_minor,
-		  (unsigned long) __entry->ino, __entry->block, __entry->count)
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->block, __entry->count)
 );
 
 TRACE_EVENT(ext4_mb_release_group_pa,
@@ -558,22 +597,20 @@
 	TP_ARGS(sb, pa),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	__u64,	pa_pstart		)
 		__field(	__u32,	pa_len			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(sb->s_dev);
-		__entry->dev_minor	= MINOR(sb->s_dev);
+		__entry->dev		= sb->s_dev;
 		__entry->pa_pstart	= pa->pa_pstart;
 		__entry->pa_len		= pa->pa_len;
 	),
 
 	TP_printk("dev %d,%d pstart %llu len %u",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->pa_pstart, __entry->pa_len)
 );
 
@@ -583,20 +620,18 @@
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 	),
 
 	TP_printk("dev %d,%d ino %lu",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino)
 );
 
@@ -606,20 +641,19 @@
 	TP_ARGS(sb, needed),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	int,	needed			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(sb->s_dev);
-		__entry->dev_minor = MINOR(sb->s_dev);
+		__entry->dev	= sb->s_dev;
 		__entry->needed	= needed;
 	),
 
 	TP_printk("dev %d,%d needed %d",
-		  __entry->dev_major, __entry->dev_minor, __entry->needed)
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->needed)
 );
 
 TRACE_EVENT(ext4_request_blocks,
@@ -628,8 +662,7 @@
 	TP_ARGS(ar),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	unsigned int, flags		)
 		__field(	unsigned int, len		)
@@ -642,8 +675,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
+		__entry->dev	= ar->inode->i_sb->s_dev;
 		__entry->ino	= ar->inode->i_ino;
 		__entry->flags	= ar->flags;
 		__entry->len	= ar->len;
@@ -655,8 +687,9 @@
 		__entry->pright	= ar->pright;
 	),
 
-	TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-		  __entry->dev_major, __entry->dev_minor,
+	TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu "
+		  "lleft %llu lright %llu pleft %llu pright %llu ",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->flags, __entry->len,
 		  (unsigned long long) __entry->logical,
@@ -673,8 +706,7 @@
 	TP_ARGS(ar, block),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	__u64,	block			)
 		__field(	unsigned int, flags		)
@@ -688,8 +720,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
+		__entry->dev	= ar->inode->i_sb->s_dev;
 		__entry->ino	= ar->inode->i_ino;
 		__entry->block	= block;
 		__entry->flags	= ar->flags;
@@ -702,10 +733,11 @@
 		__entry->pright	= ar->pright;
 	),
 
-	TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-		  __entry->dev_major, __entry->dev_minor,
-		  (unsigned long) __entry->ino, __entry->flags,
-		  __entry->len, __entry->block,
+	TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu "
+		  "goal %llu lleft %llu lright %llu pleft %llu pright %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->flags, __entry->len, __entry->block,
 		  (unsigned long long) __entry->logical,
 		  (unsigned long long) __entry->goal,
 		  (unsigned long long) __entry->lleft,
@@ -721,8 +753,7 @@
 	TP_ARGS(inode, block, count, flags),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(      umode_t, mode			)
 		__field(	__u64,	block			)
@@ -731,8 +762,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor	= MINOR(inode->i_sb->s_dev);
+		__entry->dev		= inode->i_sb->s_dev;
 		__entry->ino		= inode->i_ino;
 		__entry->mode		= inode->i_mode;
 		__entry->block		= block;
@@ -741,20 +771,19 @@
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->block, __entry->count,
 		  __entry->flags)
 );
 
-TRACE_EVENT(ext4_sync_file,
+TRACE_EVENT(ext4_sync_file_enter,
 	TP_PROTO(struct file *file, int datasync),
 
 	TP_ARGS(file, datasync),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	parent			)
 		__field(	int,	datasync		)
@@ -763,39 +792,60 @@
 	TP_fast_assign(
 		struct dentry *dentry = file->f_path.dentry;
 
-		__entry->dev_major	= MAJOR(dentry->d_inode->i_sb->s_dev);
-		__entry->dev_minor	= MINOR(dentry->d_inode->i_sb->s_dev);
+		__entry->dev		= dentry->d_inode->i_sb->s_dev;
 		__entry->ino		= dentry->d_inode->i_ino;
 		__entry->datasync	= datasync;
 		__entry->parent		= dentry->d_parent->d_inode->i_ino;
 	),
 
 	TP_printk("dev %d,%d ino %ld parent %ld datasync %d ",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  (unsigned long) __entry->parent, __entry->datasync)
 );
 
+TRACE_EVENT(ext4_sync_file_exit,
+	TP_PROTO(struct inode *inode, int ret),
+
+	TP_ARGS(inode, ret),
+
+	TP_STRUCT__entry(
+		__field(	int,	ret			)
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+	),
+
+	TP_fast_assign(
+		__entry->ret		= ret;
+		__entry->ino		= inode->i_ino;
+		__entry->dev		= inode->i_sb->s_dev;
+	),
+
+	TP_printk("dev %d,%d ino %ld ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->ret)
+);
+
 TRACE_EVENT(ext4_sync_fs,
 	TP_PROTO(struct super_block *sb, int wait),
 
 	TP_ARGS(sb, wait),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	int,	wait			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(sb->s_dev);
-		__entry->dev_minor = MINOR(sb->s_dev);
+		__entry->dev	= sb->s_dev;
 		__entry->wait	= wait;
 	),
 
-	TP_printk("dev %d,%d wait %d", __entry->dev_major,
-		  __entry->dev_minor, __entry->wait)
+	TP_printk("dev %d,%d wait %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->wait)
 );
 
 TRACE_EVENT(ext4_alloc_da_blocks,
@@ -804,23 +854,21 @@
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field( unsigned int,	data_blocks	)
 		__field( unsigned int,	meta_blocks	)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
 		__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
 	),
 
 	TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->data_blocks, __entry->meta_blocks)
 );
@@ -831,8 +879,7 @@
 	TP_ARGS(ac),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	__u16,	found			)
 		__field(	__u16,	groups			)
@@ -855,8 +902,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(ac->ac_inode->i_sb->s_dev);
-		__entry->dev_minor	= MINOR(ac->ac_inode->i_sb->s_dev);
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
 		__entry->ino		= ac->ac_inode->i_ino;
 		__entry->found		= ac->ac_found;
 		__entry->flags		= ac->ac_flags;
@@ -881,7 +927,7 @@
 	TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
 		  "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
 		  "tail %u broken %u",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->orig_group, __entry->orig_start,
 		  __entry->orig_len, __entry->orig_logical,
@@ -900,8 +946,7 @@
 	TP_ARGS(ac),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	__u32, 	orig_logical		)
 		__field(	  int,	orig_start		)
@@ -914,8 +959,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(ac->ac_inode->i_sb->s_dev);
-		__entry->dev_minor	= MINOR(ac->ac_inode->i_sb->s_dev);
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
 		__entry->ino		= ac->ac_inode->i_ino;
 		__entry->orig_logical	= ac->ac_o_ex.fe_logical;
 		__entry->orig_start	= ac->ac_o_ex.fe_start;
@@ -928,7 +972,7 @@
 	),
 
 	TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->orig_group, __entry->orig_start,
 		  __entry->orig_len, __entry->orig_logical,
@@ -946,8 +990,7 @@
 	TP_ARGS(sb, inode, group, start, len),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	  int,	result_start		)
 		__field(	__u32, 	result_group		)
@@ -955,8 +998,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(sb->s_dev);
-		__entry->dev_minor	= MINOR(sb->s_dev);
+		__entry->dev		= sb->s_dev;
 		__entry->ino		= inode ? inode->i_ino : 0;
 		__entry->result_start	= start;
 		__entry->result_group	= group;
@@ -964,7 +1006,7 @@
 	),
 
 	TP_printk("dev %d,%d inode %lu extent %u/%d/%u ",
-		  __entry->dev_major, __entry->dev_minor,
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->result_group, __entry->result_start,
 		  __entry->result_len)
@@ -998,8 +1040,7 @@
 	TP_ARGS(inode, is_metadata, block),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	int,	is_metadata		)
@@ -1007,8 +1048,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->is_metadata = is_metadata;
@@ -1016,9 +1056,9 @@
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu",
-		  __entry->dev_major, __entry->dev_minor,
-		  (unsigned long) __entry->ino, __entry->mode,
-		  __entry->is_metadata, __entry->block)
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->mode, __entry->is_metadata, __entry->block)
 );
 
 TRACE_EVENT(ext4_da_update_reserve_space,
@@ -1027,8 +1067,7 @@
 	TP_ARGS(inode, used_blocks),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	__u64,	i_blocks		)
@@ -1039,8 +1078,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->i_blocks = inode->i_blocks;
@@ -1050,10 +1088,12 @@
 		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
 	),
 
-	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
-		  __entry->dev_major, __entry->dev_minor,
-		  (unsigned long) __entry->ino, __entry->mode,
-		  (unsigned long long) __entry->i_blocks,
+	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
+		  "reserved_data_blocks %d reserved_meta_blocks %d "
+		  "allocated_meta_blocks %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->mode,  (unsigned long long) __entry->i_blocks,
 		  __entry->used_blocks, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
 );
@@ -1064,8 +1104,7 @@
 	TP_ARGS(inode, md_needed),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	__u64,	i_blocks		)
@@ -1075,8 +1114,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->i_blocks = inode->i_blocks;
@@ -1085,8 +1123,9 @@
 		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
 	),
 
-	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
-		  __entry->dev_major, __entry->dev_minor,
+	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d "
+		  "reserved_data_blocks %d reserved_meta_blocks %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, (unsigned long long) __entry->i_blocks,
 		  __entry->md_needed, __entry->reserved_data_blocks,
@@ -1099,8 +1138,7 @@
 	TP_ARGS(inode, freed_blocks),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	umode_t, mode			)
 		__field(	__u64,	i_blocks		)
@@ -1111,8 +1149,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->mode	= inode->i_mode;
 		__entry->i_blocks = inode->i_blocks;
@@ -1122,8 +1159,10 @@
 		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
 	),
 
-	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
-		  __entry->dev_major, __entry->dev_minor,
+	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d "
+		  "reserved_data_blocks %d reserved_meta_blocks %d "
+		  "allocated_meta_blocks %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, (unsigned long long) __entry->i_blocks,
 		  __entry->freed_blocks, __entry->reserved_data_blocks,
@@ -1136,20 +1175,19 @@
 	TP_ARGS(sb, group),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	__u32,	group			)
 
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(sb->s_dev);
-		__entry->dev_minor = MINOR(sb->s_dev);
+		__entry->dev	= sb->s_dev;
 		__entry->group	= group;
 	),
 
 	TP_printk("dev %d,%d group %u",
-		  __entry->dev_major, __entry->dev_minor, __entry->group)
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->group)
 );
 
 DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,
@@ -1166,6 +1204,349 @@
 	TP_ARGS(sb, group)
 );
 
+DEFINE_EVENT(ext4__bitmap_load, ext4_read_block_bitmap_load,
+
+	TP_PROTO(struct super_block *sb, unsigned long group),
+
+	TP_ARGS(sb, group)
+);
+
+DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap,
+
+	TP_PROTO(struct super_block *sb, unsigned long group),
+
+	TP_ARGS(sb, group)
+);
+
+TRACE_EVENT(ext4_direct_IO_enter,
+	TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+
+	TP_ARGS(inode, offset, len, rw),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+		__field(	loff_t,	pos			)
+		__field(	unsigned long,	len		)
+		__field(	int,	rw			)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->pos	= offset;
+		__entry->len	= len;
+		__entry->rw	= rw;
+	),
+
+	TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned long long) __entry->pos, __entry->len, __entry->rw)
+);
+
+TRACE_EVENT(ext4_direct_IO_exit,
+	TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret),
+
+	TP_ARGS(inode, offset, len, rw, ret),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+		__field(	loff_t,	pos			)
+		__field(	unsigned long,	len		)
+		__field(	int,	rw			)
+		__field(	int,	ret			)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->pos	= offset;
+		__entry->len	= len;
+		__entry->rw	= rw;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned long long) __entry->pos, __entry->len,
+		  __entry->rw, __entry->ret)
+);
+
+TRACE_EVENT(ext4_fallocate_enter,
+	TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
+
+	TP_ARGS(inode, offset, len, mode),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+		__field(	loff_t,	pos			)
+		__field(	loff_t,	len			)
+		__field(	int,	mode			)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->pos	= offset;
+		__entry->len	= len;
+		__entry->mode	= mode;
+	),
+
+	TP_printk("dev %d,%d ino %ld pos %llu len %llu mode %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned long long) __entry->pos,
+		  (unsigned long long) __entry->len, __entry->mode)
+);
+
+TRACE_EVENT(ext4_fallocate_exit,
+	TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret),
+
+	TP_ARGS(inode, offset, max_blocks, ret),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+		__field(	loff_t,	pos			)
+		__field(	unsigned,	blocks		)
+		__field(	int, 	ret			)
+	),
+
+	TP_fast_assign(
+		__entry->ino	= inode->i_ino;
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->pos	= offset;
+		__entry->blocks	= max_blocks;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("dev %d,%d ino %ld pos %llu blocks %d ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned long long) __entry->pos, __entry->blocks,
+		  __entry->ret)
+);
+
+TRACE_EVENT(ext4_unlink_enter,
+	TP_PROTO(struct inode *parent, struct dentry *dentry),
+
+	TP_ARGS(parent, dentry),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,	parent			)
+		__field(	ino_t,	ino			)
+		__field(	loff_t,	size			)
+		__field(	dev_t,	dev			)
+	),
+
+	TP_fast_assign(
+		__entry->parent		= parent->i_ino;
+		__entry->ino		= dentry->d_inode->i_ino;
+		__entry->size		= dentry->d_inode->i_size;
+		__entry->dev		= dentry->d_inode->i_sb->s_dev;
+	),
+
+	TP_printk("dev %d,%d ino %ld size %lld parent %ld",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino, __entry->size,
+		  (unsigned long) __entry->parent)
+);
+
+TRACE_EVENT(ext4_unlink_exit,
+	TP_PROTO(struct dentry *dentry, int ret),
+
+	TP_ARGS(dentry, ret),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,	ino			)
+		__field(	dev_t,	dev			)
+		__field(	int,	ret			)
+	),
+
+	TP_fast_assign(
+		__entry->ino		= dentry->d_inode->i_ino;
+		__entry->dev		= dentry->d_inode->i_sb->s_dev;
+		__entry->ret		= ret;
+	),
+
+	TP_printk("dev %d,%d ino %ld ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  __entry->ret)
+);
+
+DECLARE_EVENT_CLASS(ext4__truncate,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,  	ino		)
+		__field(	dev_t,  	dev		)
+		__field(	blkcnt_t,	blocks		)
+	),
+
+	TP_fast_assign(
+		__entry->ino    = inode->i_ino;
+		__entry->dev    = inode->i_sb->s_dev;
+		__entry->blocks	= inode->i_blocks;
+	),
+
+	TP_printk("dev %d,%d ino %lu blocks %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
+);
+
+DEFINE_EVENT(ext4__truncate, ext4_truncate_enter,
+
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode)
+);
+
+DEFINE_EVENT(ext4__truncate, ext4_truncate_exit,
+
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode)
+);
+
+DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+		 unsigned len, unsigned flags),
+
+	TP_ARGS(inode, lblk, len, flags),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,  	ino		)
+		__field(	dev_t,  	dev		)
+		__field(	ext4_lblk_t,	lblk		)
+		__field(	unsigned,	len		)
+		__field(	unsigned,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->ino    = inode->i_ino;
+		__entry->dev    = inode->i_sb->s_dev;
+		__entry->lblk	= lblk;
+		__entry->len	= len;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev %d,%d ino %lu lblk %u len %u flags %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->lblk, __entry->len, __entry->flags)
+);
+
+DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+		 unsigned len, unsigned flags),
+
+	TP_ARGS(inode, lblk, len, flags)
+);
+
+DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+		 unsigned len, unsigned flags),
+
+	TP_ARGS(inode, lblk, len, flags)
+);
+
+DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+		 ext4_fsblk_t pblk, unsigned len, int ret),
+
+	TP_ARGS(inode, lblk, pblk, len, ret),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino		)
+		__field(	dev_t,		dev		)
+		__field(	ext4_lblk_t,	lblk		)
+		__field(	ext4_fsblk_t,	pblk		)
+		__field(	unsigned,	len		)
+		__field(	int,		ret		)
+	),
+
+	TP_fast_assign(
+		__entry->ino    = inode->i_ino;
+		__entry->dev    = inode->i_sb->s_dev;
+		__entry->lblk	= lblk;
+		__entry->pblk	= pblk;
+		__entry->len	= len;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->lblk, (unsigned long long) __entry->pblk,
+		  __entry->len, __entry->ret)
+);
+
+DEFINE_EVENT(ext4__map_blocks_exit, ext4_ext_map_blocks_exit,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+		 ext4_fsblk_t pblk, unsigned len, int ret),
+
+	TP_ARGS(inode, lblk, pblk, len, ret)
+);
+
+DEFINE_EVENT(ext4__map_blocks_exit, ext4_ind_map_blocks_exit,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+		 ext4_fsblk_t pblk, unsigned len, int ret),
+
+	TP_ARGS(inode, lblk, pblk, len, ret)
+);
+
+TRACE_EVENT(ext4_ext_load_extent,
+	TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk),
+
+	TP_ARGS(inode, lblk, pblk),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,		ino		)
+		__field(	dev_t,		dev		)
+		__field(	ext4_lblk_t,	lblk		)
+		__field(	ext4_fsblk_t,	pblk		)
+	),
+
+	TP_fast_assign(
+		__entry->ino    = inode->i_ino;
+		__entry->dev    = inode->i_sb->s_dev;
+		__entry->lblk	= lblk;
+		__entry->pblk	= pblk;
+	),
+
+	TP_printk("dev %d,%d ino %lu lblk %u pblk %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino,
+		  (unsigned) __entry->lblk, (unsigned long long) __entry->pblk)
+);
+
+TRACE_EVENT(ext4_load_inode,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	ino_t,	ino		)
+		__field(	dev_t,	dev		)
+	),
+
+	TP_fast_assign(
+		__entry->ino		= inode->i_ino;
+		__entry->dev		= inode->i_sb->s_dev;
+	),
+
+	TP_printk("dev %d,%d ino %ld",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */

diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 7447ea9..bf16545 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h

@@ -17,19 +17,17 @@
 	TP_ARGS(journal, result),
 
 	TP_STRUCT__entry(
-		__field(	int,	dev_major		)
-		__field(	int,	dev_minor		)
+		__field(	dev_t,	dev			)
 		__field(	int,	result			)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(journal->j_fs_dev->bd_dev);
-		__entry->dev_minor	= MINOR(journal->j_fs_dev->bd_dev);
+		__entry->dev		= journal->j_fs_dev->bd_dev;
 		__entry->result		= result;
 	),
 
-	TP_printk("dev %d,%d result %d",
-		  __entry->dev_major, __entry->dev_minor, __entry->result)
+	TP_printk("dev %s result %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->result)
 );
 
 DECLARE_EVENT_CLASS(jbd2_commit,
@@ -39,22 +37,20 @@
 	TP_ARGS(journal, commit_transaction),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	char,	sync_commit		  )
 		__field(	int,	transaction		  )
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(journal->j_fs_dev->bd_dev);
-		__entry->dev_minor	= MINOR(journal->j_fs_dev->bd_dev);
+		__entry->dev		= journal->j_fs_dev->bd_dev;
 		__entry->sync_commit = commit_transaction->t_synchronous_commit;
 		__entry->transaction	= commit_transaction->t_tid;
 	),
 
-	TP_printk("dev %d,%d transaction %d sync %d",
-		  __entry->dev_major, __entry->dev_minor,
-		  __entry->transaction, __entry->sync_commit)
+	TP_printk("dev %s transaction %d sync %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
+		  __entry->sync_commit)
 );
 
 DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
@@ -91,24 +87,22 @@
 	TP_ARGS(journal, commit_transaction),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	char,	sync_commit		  )
 		__field(	int,	transaction		  )
 		__field(	int,	head		  	  )
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(journal->j_fs_dev->bd_dev);
-		__entry->dev_minor	= MINOR(journal->j_fs_dev->bd_dev);
+		__entry->dev		= journal->j_fs_dev->bd_dev;
 		__entry->sync_commit = commit_transaction->t_synchronous_commit;
 		__entry->transaction	= commit_transaction->t_tid;
 		__entry->head		= journal->j_tail_sequence;
 	),
 
-	TP_printk("dev %d,%d transaction %d sync %d head %d",
-		  __entry->dev_major, __entry->dev_minor,
-		  __entry->transaction, __entry->sync_commit, __entry->head)
+	TP_printk("dev %s transaction %d sync %d head %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
+		  __entry->sync_commit, __entry->head)
 );
 
 TRACE_EVENT(jbd2_submit_inode_data,
@@ -117,20 +111,17 @@
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 	),
 
 	TP_fast_assign(
-		__entry->dev_major = MAJOR(inode->i_sb->s_dev);
-		__entry->dev_minor = MINOR(inode->i_sb->s_dev);
+		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 	),
 
-	TP_printk("dev %d,%d ino %lu",
-		  __entry->dev_major, __entry->dev_minor,
-		  (unsigned long) __entry->ino)
+	TP_printk("dev %s ino %lu",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
 );
 
 TRACE_EVENT(jbd2_run_stats,
@@ -140,8 +131,7 @@
 	TP_ARGS(dev, tid, stats),
 
 	TP_STRUCT__entry(
-		__field(		  int,	dev_major	)
-		__field(		  int,	dev_minor	)
+		__field(		dev_t,	dev		)
 		__field(	unsigned long,	tid		)
 		__field(	unsigned long,	wait		)
 		__field(	unsigned long,	running		)
@@ -154,8 +144,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(dev);
-		__entry->dev_minor	= MINOR(dev);
+		__entry->dev		= dev;
 		__entry->tid		= tid;
 		__entry->wait		= stats->rs_wait;
 		__entry->running	= stats->rs_running;
@@ -167,9 +156,9 @@
 		__entry->blocks_logged	= stats->rs_blocks_logged;
 	),
 
-	TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u "
+	TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
 		  "logging %u handle_count %u blocks %u blocks_logged %u",
-		  __entry->dev_major, __entry->dev_minor, __entry->tid,
+		  jbd2_dev_to_name(__entry->dev), __entry->tid,
 		  jiffies_to_msecs(__entry->wait),
 		  jiffies_to_msecs(__entry->running),
 		  jiffies_to_msecs(__entry->locked),
@@ -186,8 +175,7 @@
 	TP_ARGS(dev, tid, stats),
 
 	TP_STRUCT__entry(
-		__field(		  int,	dev_major	)
-		__field(		  int,	dev_minor	)
+		__field(		dev_t,	dev		)
 		__field(	unsigned long,	tid		)
 		__field(	unsigned long,	chp_time	)
 		__field(		__u32,	forced_to_close	)
@@ -196,8 +184,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(dev);
-		__entry->dev_minor	= MINOR(dev);
+		__entry->dev		= dev;
 		__entry->tid		= tid;
 		__entry->chp_time	= stats->cs_chp_time;
 		__entry->forced_to_close= stats->cs_forced_to_close;
@@ -205,9 +192,9 @@
 		__entry->dropped	= stats->cs_dropped;
 	),
 
-	TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
+	TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
 		  "written %u dropped %u",
-		  __entry->dev_major, __entry->dev_minor, __entry->tid,
+		  jbd2_dev_to_name(__entry->dev), __entry->tid,
 		  jiffies_to_msecs(__entry->chp_time),
 		  __entry->forced_to_close, __entry->written, __entry->dropped)
 );
@@ -220,8 +207,7 @@
 	TP_ARGS(journal, first_tid, block_nr, freed),
 
 	TP_STRUCT__entry(
-		__field(	int,   dev_major                )
-		__field(	int,   dev_minor                )
+		__field(	dev_t,	dev			)
 		__field(	tid_t,	tail_sequence		)
 		__field(	tid_t,	first_tid		)
 		__field(unsigned long,	block_nr		)
@@ -229,18 +215,16 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev_major	= MAJOR(journal->j_fs_dev->bd_dev);
-		__entry->dev_minor	= MINOR(journal->j_fs_dev->bd_dev);
+		__entry->dev		= journal->j_fs_dev->bd_dev;
 		__entry->tail_sequence	= journal->j_tail_sequence;
 		__entry->first_tid	= first_tid;
 		__entry->block_nr	= block_nr;
 		__entry->freed		= freed;
 	),
 
-	TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
-		  __entry->dev_major, __entry->dev_minor,
-		  __entry->tail_sequence, __entry->first_tid,
-		  __entry->block_nr, __entry->freed)
+	TP_printk("dev %s from %u to %u offset %lu freed %lu",
+		  jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
+		  __entry->first_tid, __entry->block_nr, __entry->freed)
 );
 
 #endif /* _TRACE_JBD2_H */

diff --git a/ipc/namespace.c b/ipc/namespace.c
index 3c3e522..8054c8e 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c

@@ -104,7 +104,6 @@
 	sem_exit_ns(ns);
 	msg_exit_ns(ns);
 	shm_exit_ns(ns);
-	kfree(ns);
 	atomic_dec(&nr_ipc_ns);
 
 	/*
@@ -113,6 +112,7 @@
 	 */
 	ipcns_notify(IPCNS_REMOVED);
 	put_user_ns(ns->user_ns);
+	kfree(ns);
 }
 
 /*

diff --git a/ipc/util.c b/ipc/util.c
index 8fd1b89..5c0d289 100644
--- a/ipc/util.c
+++ b/ipc/util.c

@@ -317,6 +317,7 @@
 
 /**
  *	ipc_check_perms	-	check security and permissions for an IPC
+ *	@ns: IPC namespace
  *	@ipcp: ipc permission set
  *	@ops: the actual security routine to call
  *	@params: its parameters
@@ -607,6 +608,7 @@
 
 /**
  *	ipcperms	-	check IPC permissions
+ *	@ns: IPC namespace
  *	@ipcp: IPC permission set
  *	@flag: desired permission set.
  *
@@ -769,7 +771,7 @@
 
 /**
  * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd
- * @ids:  the ipc namespace
+ * @ns:  the ipc namespace
  * @ids:  the table of ids where to look for the ipc
  * @id:   the id of the ipc to retrieve
  * @cmd:  the cmd to check

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index bd3e8e2..6bc6e3b 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c

@@ -78,7 +78,7 @@
 static kdbtab_t *kdb_commands;
 #define KDB_BASE_CMD_MAX 50
 static int kdb_max_commands = KDB_BASE_CMD_MAX;
-static kdbtab_t kdb_base_commands[50];
+static kdbtab_t kdb_base_commands[KDB_BASE_CMD_MAX];
 #define for_each_kdbcmd(cmd, num)					\
 	for ((cmd) = kdb_base_commands, (num) = 0;			\
 	     num < kdb_max_commands;					\
@@ -2892,7 +2892,7 @@
 	  "Send a signal to a process", 0, KDB_REPEAT_NONE);
 	kdb_register_repeat("summary", kdb_summary, "",
 	  "Summarize the system", 4, KDB_REPEAT_NONE);
-	kdb_register_repeat("per_cpu", kdb_per_cpu, "",
+	kdb_register_repeat("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
 	  "Display per_cpu variables", 3, KDB_REPEAT_NONE);
 	kdb_register_repeat("grephelp", kdb_grep_help, "",
 	  "Display help on | grep", 0, KDB_REPEAT_NONE);

diff --git a/kernel/futex.c b/kernel/futex.c
index 6570c459f..dfb924f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c

@@ -782,8 +782,8 @@
 {
 	struct futex_hash_bucket *hb;
 
-	if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr)
-			|| plist_node_empty(&q->list)))
+	if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
+	    || WARN_ON(plist_node_empty(&q->list)))
 		return;
 
 	hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);

diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 09bef82..00f2c03 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig

@@ -31,6 +31,10 @@
 config GENERIC_IRQ_SHOW
        bool
 
+# Print level/edge extra information
+config GENERIC_IRQ_SHOW_LEVEL
+       bool
+
 # Support for delayed migration from interrupt context
 config GENERIC_PENDING_IRQ
 	bool

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index dbccc79..6fb014f 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c

@@ -198,15 +198,6 @@
 	return -ENOMEM;
 }
 
-struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
-{
-	int res = irq_alloc_descs(irq, irq, 1, node);
-
-	if (res == -EEXIST || res == irq)
-		return irq_to_desc(irq);
-	return NULL;
-}
-
 static int irq_expand_nr_irqs(unsigned int nr)
 {
 	if (nr > IRQ_BITMAP_BITS)
@@ -283,11 +274,6 @@
 	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
-{
-	return irq_to_desc(irq);
-}
-
 static void free_desc(unsigned int irq)
 {
 	dynamic_irq_cleanup(irq);

diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 760248d..626d092 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c

@@ -404,7 +404,20 @@
 	seq_printf(p, "%*d: ", prec, i);
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
-	seq_printf(p, " %8s", desc->irq_data.chip->name);
+
+	if (desc->irq_data.chip) {
+		if (desc->irq_data.chip->irq_print_chip)
+			desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
+		else if (desc->irq_data.chip->name)
+			seq_printf(p, " %8s", desc->irq_data.chip->name);
+		else
+			seq_printf(p, " %8s", "-");
+	} else {
+		seq_printf(p, " %8s", "None");
+	}
+#ifdef CONFIG_GENIRC_IRQ_SHOW_LEVEL
+	seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
+#endif
 	if (desc->name)
 		seq_printf(p, "-%-8s", desc->name);
 

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index a56aa58..079f1d3 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c

@@ -342,13 +342,15 @@
 }
 
 /* Look up a kernel symbol and return it in a text buffer. */
-int sprint_symbol(char *buffer, unsigned long address)
+static int __sprint_symbol(char *buffer, unsigned long address,
+			   int symbol_offset)
 {
 	char *modname;
 	const char *name;
 	unsigned long offset, size;
 	int len;
 
+	address += symbol_offset;
 	name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
 	if (!name)
 		return sprintf(buffer, "0x%lx", address);
@@ -357,17 +359,53 @@
 		strcpy(buffer, name);
 	len = strlen(buffer);
 	buffer += len;
+	offset -= symbol_offset;
 
 	if (modname)
-		len += sprintf(buffer, "+%#lx/%#lx [%s]",
-						offset, size, modname);
+		len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname);
 	else
 		len += sprintf(buffer, "+%#lx/%#lx", offset, size);
 
 	return len;
 }
+
+/**
+ * sprint_symbol - Look up a kernel symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function looks up a kernel symbol with @address and stores its name,
+ * offset, size and module name to @buffer if possible. If no symbol was found,
+ * just saves its @address as is.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_symbol(char *buffer, unsigned long address)
+{
+	return __sprint_symbol(buffer, address, 0);
+}
+
 EXPORT_SYMBOL_GPL(sprint_symbol);
 
+/**
+ * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function is for stack backtrace and does the same thing as
+ * sprint_symbol() but with modified/decreased @address. If there is a
+ * tail-call to the function marked "noreturn", gcc optimized out code after
+ * the call so that the stack-saved return address could point outside of the
+ * caller. This function ensures that kallsyms will find the original caller
+ * by decreasing @address.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_backtrace(char *buffer, unsigned long address)
+{
+	return __sprint_symbol(buffer, address, -1);
+}
+
 /* Look up a kernel symbol and print it to the kernel messages. */
 void __print_symbol(const char *fmt, unsigned long address)
 {

diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 1969d2f..71edd2f 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c

@@ -225,7 +225,7 @@
 		      nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
 		      nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
 		      nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
-		      sum_forward_deps = 0, factor = 0;
+		      sum_forward_deps = 0;
 
 	list_for_each_entry(class, &all_lock_classes, lock_entry) {
 
@@ -283,13 +283,6 @@
 			nr_hardirq_unsafe * nr_hardirq_safe +
 			nr_list_entries);
 
-	/*
-	 * Estimated factor between direct and indirect
-	 * dependencies:
-	 */
-	if (nr_list_entries)
-		factor = sum_forward_deps / nr_list_entries;
-
 #ifdef CONFIG_PROVE_LOCKING
 	seq_printf(m, " dependency chains:             %11lu [max: %lu]\n",
 			nr_lock_chains, MAX_LOCKDEP_CHAINS);

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3472bb1..c75925c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c

@@ -145,7 +145,8 @@
  */
 int sysctl_perf_event_paranoid __read_mostly = 1;
 
-int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
+/* Minimum for 128 pages + 1 for the user control page */
+int sysctl_perf_event_mlock __read_mostly = 516; /* 'free' kb per user */
 
 /*
  * max perf event sample rate
@@ -941,6 +942,7 @@
 static void
 list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+	struct perf_cpu_context *cpuctx;
 	/*
 	 * We can have double detach due to exit/hot-unplug + close.
 	 */
@@ -949,8 +951,17 @@
 
 	event->attach_state &= ~PERF_ATTACH_CONTEXT;
 
-	if (is_cgroup_event(event))
+	if (is_cgroup_event(event)) {
 		ctx->nr_cgroups--;
+		cpuctx = __get_cpu_context(ctx);
+		/*
+		 * if there are no more cgroup events
+		 * then cler cgrp to avoid stale pointer
+		 * in update_cgrp_time_from_cpuctx()
+		 */
+		if (!ctx->nr_cgroups)
+			cpuctx->cgrp = NULL;
+	}
 
 	ctx->nr_events--;
 	if (event->attr.inherit_stat)

diff --git a/kernel/sched.c b/kernel/sched.c
index ae659b9..f592ce6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c

@@ -5473,6 +5473,8 @@
  * yield_to - yield the current processor to another thread in
  * your thread group, or accelerate that thread toward the
  * processor it's on.
+ * @p: target task
+ * @preempt: whether task preemption is allowed or not
  *
  * It's the caller's job to ensure that the target task struct
  * can't go away on us before we can do any checks.
@@ -8449,7 +8451,6 @@
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se;
-	struct rq *rq;
 	int i;
 
 	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8462,8 +8463,6 @@
 	tg->shares = NICE_0_LOAD;
 
 	for_each_possible_cpu(i) {
-		rq = cpu_rq(i);
-
 		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
 				      GFP_KERNEL, cpu_to_node(i));
 		if (!cfs_rq)

diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index c82f26c..a776a63 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c

@@ -94,6 +94,4 @@
 
 	.prio_changed		= prio_changed_idle,
 	.switched_to		= switched_to_idle,
-
-	/* no .task_new for idle tasks */
 };

diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 84ec9bc..1ba2bd4 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c

@@ -102,6 +102,4 @@
 
 	.prio_changed		= prio_changed_stop,
 	.switched_to		= switched_to_stop,
-
-	/* no .task_new for stop tasks */
 };

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3bd7e3d..8ad5d57 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c

@@ -14,7 +14,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
 #include <linux/time.h>
@@ -597,13 +597,12 @@
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
- * @dev:	unused
  *
  * This is for the generic clocksource timekeeping.
  * xtime/wall_to_monotonic/jiffies/etc are
  * still managed by arch specific suspend/resume code.
  */
-static int timekeeping_resume(struct sys_device *dev)
+static void timekeeping_resume(void)
 {
 	unsigned long flags;
 	struct timespec ts;
@@ -632,11 +631,9 @@
 
 	/* Resume hrtimers */
 	hres_timers_resume();
-
-	return 0;
 }
 
-static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
+static int timekeeping_suspend(void)
 {
 	unsigned long flags;
 
@@ -654,26 +651,18 @@
 }
 
 /* sysfs resume/suspend bits for timekeeping */
-static struct sysdev_class timekeeping_sysclass = {
-	.name		= "timekeeping",
+static struct syscore_ops timekeeping_syscore_ops = {
 	.resume		= timekeeping_resume,
 	.suspend	= timekeeping_suspend,
 };
 
-static struct sys_device device_timer = {
-	.id		= 0,
-	.cls		= &timekeeping_sysclass,
-};
-
-static int __init timekeeping_init_device(void)
+static int __init timekeeping_init_ops(void)
 {
-	int error = sysdev_class_register(&timekeeping_sysclass);
-	if (!error)
-		error = sysdev_register(&device_timer);
-	return error;
+	register_syscore_ops(&timekeeping_syscore_ops);
+	return 0;
 }
 
-device_initcall(timekeeping_init_device);
+device_initcall(timekeeping_init_ops);
 
 /*
  * If the error is already larger, we look ahead even further

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 888b611..c075f4e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c

@@ -1467,7 +1467,7 @@
 		return t_hash_next(m, pos);
 
 	(*pos)++;
-	iter->pos = *pos;
+	iter->pos = iter->func_pos = *pos;
 
 	if (iter->flags & FTRACE_ITER_PRINTALL)
 		return t_hash_start(m, pos);
@@ -1502,7 +1502,6 @@
 	if (!rec)
 		return t_hash_start(m, pos);
 
-	iter->func_pos = *pos;
 	iter->func = rec;
 
 	return iter;

diff --git a/lib/Kconfig b/lib/Kconfig
index 23fa7a3..9c10e38 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig

@@ -158,6 +158,45 @@
 	boolean
 
 #
+# BCH support is selected if needed
+#
+config BCH
+	tristate
+
+config BCH_CONST_PARAMS
+	boolean
+	help
+	  Drivers may select this option to force specific constant
+	  values for parameters 'm' (Galois field order) and 't'
+	  (error correction capability). Those specific values must
+	  be set by declaring default values for symbols BCH_CONST_M
+	  and BCH_CONST_T.
+	  Doing so will enable extra compiler optimizations,
+	  improving encoding and decoding performance up to 2x for
+	  usual (m,t) values (typically such that m*t < 200).
+	  When this option is selected, the BCH library supports
+	  only a single (m,t) configuration. This is mainly useful
+	  for NAND flash board drivers requiring known, fixed BCH
+	  parameters.
+
+config BCH_CONST_M
+	int
+	range 5 15
+	help
+	  Constant value for Galois field order 'm'. If 'k' is the
+	  number of data bits to protect, 'm' should be chosen such
+	  that (k + m*t) <= 2**m - 1.
+	  Drivers should declare a default value for this symbol if
+	  they select option BCH_CONST_PARAMS.
+
+config BCH_CONST_T
+	int
+	help
+	  Constant value for error correction capability in bits 't'.
+	  Drivers should declare a default value for this symbol if
+	  they select option BCH_CONST_PARAMS.
+
+#
 # Textsearch support is select'ed if needed
 #
 config TEXTSEARCH

diff --git a/lib/Makefile b/lib/Makefile
index d7872b5..ef0f285 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -69,6 +69,7 @@
 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
 obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
+obj-$(CONFIG_BCH) += bch.o
 obj-$(CONFIG_LZO_COMPRESS) += lzo/
 obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
 obj-$(CONFIG_XZ_DEC) += xz/

diff --git a/lib/bch.c b/lib/bch.c
new file mode 100644
index 0000000..bc89dfe
--- /dev/null
+++ b/lib/bch.c

@@ -0,0 +1,1368 @@
+/*
+ * Generic binary BCH encoding/decoding library
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Copyright © 2011 Parrot S.A.
+ *
+ * Author: Ivan Djelic <ivan.djelic@parrot.com>
+ *
+ * Description:
+ *
+ * This library provides runtime configurable encoding/decoding of binary
+ * Bose-Chaudhuri-Hocquenghem (BCH) codes.
+ *
+ * Call init_bch to get a pointer to a newly allocated bch_control structure for
+ * the given m (Galois field order), t (error correction capability) and
+ * (optional) primitive polynomial parameters.
+ *
+ * Call encode_bch to compute and store ecc parity bytes to a given buffer.
+ * Call decode_bch to detect and locate errors in received data.
+ *
+ * On systems supporting hw BCH features, intermediate results may be provided
+ * to decode_bch in order to skip certain steps. See decode_bch() documentation
+ * for details.
+ *
+ * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
+ * parameters m and t; thus allowing extra compiler optimizations and providing
+ * better (up to 2x) encoding performance. Using this option makes sense when
+ * (m,t) are fixed and known in advance, e.g. when using BCH error correction
+ * on a particular NAND flash device.
+ *
+ * Algorithmic details:
+ *
+ * Encoding is performed by processing 32 input bits in parallel, using 4
+ * remainder lookup tables.
+ *
+ * The final stage of decoding involves the following internal steps:
+ * a. Syndrome computation
+ * b. Error locator polynomial computation using Berlekamp-Massey algorithm
+ * c. Error locator root finding (by far the most expensive step)
+ *
+ * In this implementation, step c is not performed using the usual Chien search.
+ * Instead, an alternative approach described in [1] is used. It consists in
+ * factoring the error locator polynomial using the Berlekamp Trace algorithm
+ * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
+ * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
+ * much better performance than Chien search for usual (m,t) values (typically
+ * m >= 13, t < 32, see [1]).
+ *
+ * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
+ * of characteristic 2, in: Western European Workshop on Research in Cryptology
+ * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
+ * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
+ * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <asm/byteorder.h>
+#include <linux/bch.h>
+
+#if defined(CONFIG_BCH_CONST_PARAMS)
+#define GF_M(_p)               (CONFIG_BCH_CONST_M)
+#define GF_T(_p)               (CONFIG_BCH_CONST_T)
+#define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
+#else
+#define GF_M(_p)               ((_p)->m)
+#define GF_T(_p)               ((_p)->t)
+#define GF_N(_p)               ((_p)->n)
+#endif
+
+#define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
+#define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
+
+#ifndef dbg
+#define dbg(_fmt, args...)     do {} while (0)
+#endif
+
+/*
+ * represent a polynomial over GF(2^m)
+ */
+struct gf_poly {
+	unsigned int deg;    /* polynomial degree */
+	unsigned int c[0];   /* polynomial terms */
+};
+
+/* given its degree, compute a polynomial size in bytes */
+#define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
+
+/* polynomial of degree 1 */
+struct gf_poly_deg1 {
+	struct gf_poly poly;
+	unsigned int   c[2];
+};
+
+/*
+ * same as encode_bch(), but process input data one byte at a time
+ */
+static void encode_bch_unaligned(struct bch_control *bch,
+				 const unsigned char *data, unsigned int len,
+				 uint32_t *ecc)
+{
+	int i;
+	const uint32_t *p;
+	const int l = BCH_ECC_WORDS(bch)-1;
+
+	while (len--) {
+		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(*data++)) & 0xff);
+
+		for (i = 0; i < l; i++)
+			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
+
+		ecc[l] = (ecc[l] << 8)^(*p);
+	}
+}
+
+/*
+ * convert ecc bytes to aligned, zero-padded 32-bit ecc words
+ */
+static void load_ecc8(struct bch_control *bch, uint32_t *dst,
+		      const uint8_t *src)
+{
+	uint8_t pad[4] = {0, 0, 0, 0};
+	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
+
+	for (i = 0; i < nwords; i++, src += 4)
+		dst[i] = (src[0] << 24)|(src[1] << 16)|(src[2] << 8)|src[3];
+
+	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
+	dst[nwords] = (pad[0] << 24)|(pad[1] << 16)|(pad[2] << 8)|pad[3];
+}
+
+/*
+ * convert 32-bit ecc words to ecc bytes
+ */
+static void store_ecc8(struct bch_control *bch, uint8_t *dst,
+		       const uint32_t *src)
+{
+	uint8_t pad[4];
+	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
+
+	for (i = 0; i < nwords; i++) {
+		*dst++ = (src[i] >> 24);
+		*dst++ = (src[i] >> 16) & 0xff;
+		*dst++ = (src[i] >>  8) & 0xff;
+		*dst++ = (src[i] >>  0) & 0xff;
+	}
+	pad[0] = (src[nwords] >> 24);
+	pad[1] = (src[nwords] >> 16) & 0xff;
+	pad[2] = (src[nwords] >>  8) & 0xff;
+	pad[3] = (src[nwords] >>  0) & 0xff;
+	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
+}
+
+/**
+ * encode_bch - calculate BCH ecc parity of data
+ * @bch:   BCH control structure
+ * @data:  data to encode
+ * @len:   data length in bytes
+ * @ecc:   ecc parity data, must be initialized by caller
+ *
+ * The @ecc parity array is used both as input and output parameter, in order to
+ * allow incremental computations. It should be of the size indicated by member
+ * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
+ *
+ * The exact number of computed ecc parity bits is given by member @ecc_bits of
+ * @bch; it may be less than m*t for large values of t.
+ */
+void encode_bch(struct bch_control *bch, const uint8_t *data,
+		unsigned int len, uint8_t *ecc)
+{
+	const unsigned int l = BCH_ECC_WORDS(bch)-1;
+	unsigned int i, mlen;
+	unsigned long m;
+	uint32_t w, r[l+1];
+	const uint32_t * const tab0 = bch->mod8_tab;
+	const uint32_t * const tab1 = tab0 + 256*(l+1);
+	const uint32_t * const tab2 = tab1 + 256*(l+1);
+	const uint32_t * const tab3 = tab2 + 256*(l+1);
+	const uint32_t *pdata, *p0, *p1, *p2, *p3;
+
+	if (ecc) {
+		/* load ecc parity bytes into internal 32-bit buffer */
+		load_ecc8(bch, bch->ecc_buf, ecc);
+	} else {
+		memset(bch->ecc_buf, 0, sizeof(r));
+	}
+
+	/* process first unaligned data bytes */
+	m = ((unsigned long)data) & 3;
+	if (m) {
+		mlen = (len < (4-m)) ? len : 4-m;
+		encode_bch_unaligned(bch, data, mlen, bch->ecc_buf);
+		data += mlen;
+		len  -= mlen;
+	}
+
+	/* process 32-bit aligned data words */
+	pdata = (uint32_t *)data;
+	mlen  = len/4;
+	data += 4*mlen;
+	len  -= 4*mlen;
+	memcpy(r, bch->ecc_buf, sizeof(r));
+
+	/*
+	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
+	 *
+	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
+	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
+	 *                               tttttttt  mod g = r0 (precomputed)
+	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
+	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
+	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
+	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
+	 */
+	while (mlen--) {
+		/* input data is read in big-endian format */
+		w = r[0]^cpu_to_be32(*pdata++);
+		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
+		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
+		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
+		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
+
+		for (i = 0; i < l; i++)
+			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
+
+		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
+	}
+	memcpy(bch->ecc_buf, r, sizeof(r));
+
+	/* process last unaligned bytes */
+	if (len)
+		encode_bch_unaligned(bch, data, len, bch->ecc_buf);
+
+	/* store ecc parity bytes into original parity buffer */
+	if (ecc)
+		store_ecc8(bch, ecc, bch->ecc_buf);
+}
+EXPORT_SYMBOL_GPL(encode_bch);
+
+static inline int modulo(struct bch_control *bch, unsigned int v)
+{
+	const unsigned int n = GF_N(bch);
+	while (v >= n) {
+		v -= n;
+		v = (v & n) + (v >> GF_M(bch));
+	}
+	return v;
+}
+
+/*
+ * shorter and faster modulo function, only works when v < 2N.
+ */
+static inline int mod_s(struct bch_control *bch, unsigned int v)
+{
+	const unsigned int n = GF_N(bch);
+	return (v < n) ? v : v-n;
+}
+
+static inline int deg(unsigned int poly)
+{
+	/* polynomial degree is the most-significant bit index */
+	return fls(poly)-1;
+}
+
+static inline int parity(unsigned int x)
+{
+	/*
+	 * public domain code snippet, lifted from
+	 * http://www-graphics.stanford.edu/~seander/bithacks.html
+	 */
+	x ^= x >> 1;
+	x ^= x >> 2;
+	x = (x & 0x11111111U) * 0x11111111U;
+	return (x >> 28) & 1;
+}
+
+/* Galois field basic operations: multiply, divide, inverse, etc. */
+
+static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
+				  unsigned int b)
+{
+	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
+					       bch->a_log_tab[b])] : 0;
+}
+
+static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
+{
+	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
+}
+
+static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
+				  unsigned int b)
+{
+	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
+					GF_N(bch)-bch->a_log_tab[b])] : 0;
+}
+
+static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
+{
+	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
+}
+
+static inline unsigned int a_pow(struct bch_control *bch, int i)
+{
+	return bch->a_pow_tab[modulo(bch, i)];
+}
+
+static inline int a_log(struct bch_control *bch, unsigned int x)
+{
+	return bch->a_log_tab[x];
+}
+
+static inline int a_ilog(struct bch_control *bch, unsigned int x)
+{
+	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
+}
+
+/*
+ * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
+ */
+static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
+			      unsigned int *syn)
+{
+	int i, j, s;
+	unsigned int m;
+	uint32_t poly;
+	const int t = GF_T(bch);
+
+	s = bch->ecc_bits;
+
+	/* make sure extra bits in last ecc word are cleared */
+	m = ((unsigned int)s) & 31;
+	if (m)
+		ecc[s/32] &= ~((1u << (32-m))-1);
+	memset(syn, 0, 2*t*sizeof(*syn));
+
+	/* compute v(a^j) for j=1 .. 2t-1 */
+	do {
+		poly = *ecc++;
+		s -= 32;
+		while (poly) {
+			i = deg(poly);
+			for (j = 0; j < 2*t; j += 2)
+				syn[j] ^= a_pow(bch, (j+1)*(i+s));
+
+			poly ^= (1 << i);
+		}
+	} while (s > 0);
+
+	/* v(a^(2j)) = v(a^j)^2 */
+	for (j = 0; j < t; j++)
+		syn[2*j+1] = gf_sqr(bch, syn[j]);
+}
+
+static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
+{
+	memcpy(dst, src, GF_POLY_SZ(src->deg));
+}
+
+static int compute_error_locator_polynomial(struct bch_control *bch,
+					    const unsigned int *syn)
+{
+	const unsigned int t = GF_T(bch);
+	const unsigned int n = GF_N(bch);
+	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
+	struct gf_poly *elp = bch->elp;
+	struct gf_poly *pelp = bch->poly_2t[0];
+	struct gf_poly *elp_copy = bch->poly_2t[1];
+	int k, pp = -1;
+
+	memset(pelp, 0, GF_POLY_SZ(2*t));
+	memset(elp, 0, GF_POLY_SZ(2*t));
+
+	pelp->deg = 0;
+	pelp->c[0] = 1;
+	elp->deg = 0;
+	elp->c[0] = 1;
+
+	/* use simplified binary Berlekamp-Massey algorithm */
+	for (i = 0; (i < t) && (elp->deg <= t); i++) {
+		if (d) {
+			k = 2*i-pp;
+			gf_poly_copy(elp_copy, elp);
+			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
+			tmp = a_log(bch, d)+n-a_log(bch, pd);
+			for (j = 0; j <= pelp->deg; j++) {
+				if (pelp->c[j]) {
+					l = a_log(bch, pelp->c[j]);
+					elp->c[j+k] ^= a_pow(bch, tmp+l);
+				}
+			}
+			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
+			tmp = pelp->deg+k;
+			if (tmp > elp->deg) {
+				elp->deg = tmp;
+				gf_poly_copy(pelp, elp_copy);
+				pd = d;
+				pp = 2*i;
+			}
+		}
+		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
+		if (i < t-1) {
+			d = syn[2*i+2];
+			for (j = 1; j <= elp->deg; j++)
+				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
+		}
+	}
+	dbg("elp=%s\n", gf_poly_str(elp));
+	return (elp->deg > t) ? -1 : (int)elp->deg;
+}
+
+/*
+ * solve a m x m linear system in GF(2) with an expected number of solutions,
+ * and return the number of found solutions
+ */
+static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
+			       unsigned int *sol, int nsol)
+{
+	const int m = GF_M(bch);
+	unsigned int tmp, mask;
+	int rem, c, r, p, k, param[m];
+
+	k = 0;
+	mask = 1 << m;
+
+	/* Gaussian elimination */
+	for (c = 0; c < m; c++) {
+		rem = 0;
+		p = c-k;
+		/* find suitable row for elimination */
+		for (r = p; r < m; r++) {
+			if (rows[r] & mask) {
+				if (r != p) {
+					tmp = rows[r];
+					rows[r] = rows[p];
+					rows[p] = tmp;
+				}
+				rem = r+1;
+				break;
+			}
+		}
+		if (rem) {
+			/* perform elimination on remaining rows */
+			tmp = rows[p];
+			for (r = rem; r < m; r++) {
+				if (rows[r] & mask)
+					rows[r] ^= tmp;
+			}
+		} else {
+			/* elimination not needed, store defective row index */
+			param[k++] = c;
+		}
+		mask >>= 1;
+	}
+	/* rewrite system, inserting fake parameter rows */
+	if (k > 0) {
+		p = k;
+		for (r = m-1; r >= 0; r--) {
+			if ((r > m-1-k) && rows[r])
+				/* system has no solution */
+				return 0;
+
+			rows[r] = (p && (r == param[p-1])) ?
+				p--, 1u << (m-r) : rows[r-p];
+		}
+	}
+
+	if (nsol != (1 << k))
+		/* unexpected number of solutions */
+		return 0;
+
+	for (p = 0; p < nsol; p++) {
+		/* set parameters for p-th solution */
+		for (c = 0; c < k; c++)
+			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
+
+		/* compute unique solution */
+		tmp = 0;
+		for (r = m-1; r >= 0; r--) {
+			mask = rows[r] & (tmp|1);
+			tmp |= parity(mask) << (m-r);
+		}
+		sol[p] = tmp >> 1;
+	}
+	return nsol;
+}
+
+/*
+ * this function builds and solves a linear system for finding roots of a degree
+ * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
+ */
+static int find_affine4_roots(struct bch_control *bch, unsigned int a,
+			      unsigned int b, unsigned int c,
+			      unsigned int *roots)
+{
+	int i, j, k;
+	const int m = GF_M(bch);
+	unsigned int mask = 0xff, t, rows[16] = {0,};
+
+	j = a_log(bch, b);
+	k = a_log(bch, a);
+	rows[0] = c;
+
+	/* buid linear system to solve X^4+aX^2+bX+c = 0 */
+	for (i = 0; i < m; i++) {
+		rows[i+1] = bch->a_pow_tab[4*i]^
+			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
+			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
+		j++;
+		k += 2;
+	}
+	/*
+	 * transpose 16x16 matrix before passing it to linear solver
+	 * warning: this code assumes m < 16
+	 */
+	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
+		for (k = 0; k < 16; k = (k+j+1) & ~j) {
+			t = ((rows[k] >> j)^rows[k+j]) & mask;
+			rows[k] ^= (t << j);
+			rows[k+j] ^= t;
+		}
+	}
+	return solve_linear_system(bch, rows, roots, 4);
+}
+
+/*
+ * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
+ */
+static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
+				unsigned int *roots)
+{
+	int n = 0;
+
+	if (poly->c[0])
+		/* poly[X] = bX+c with c!=0, root=c/b */
+		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
+				   bch->a_log_tab[poly->c[1]]);
+	return n;
+}
+
+/*
+ * compute roots of a degree 2 polynomial over GF(2^m)
+ */
+static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
+				unsigned int *roots)
+{
+	int n = 0, i, l0, l1, l2;
+	unsigned int u, v, r;
+
+	if (poly->c[0] && poly->c[1]) {
+
+		l0 = bch->a_log_tab[poly->c[0]];
+		l1 = bch->a_log_tab[poly->c[1]];
+		l2 = bch->a_log_tab[poly->c[2]];
+
+		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
+		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
+		/*
+		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
+		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
+		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
+		 * i.e. r and r+1 are roots iff Tr(u)=0
+		 */
+		r = 0;
+		v = u;
+		while (v) {
+			i = deg(v);
+			r ^= bch->xi_tab[i];
+			v ^= (1 << i);
+		}
+		/* verify root */
+		if ((gf_sqr(bch, r)^r) == u) {
+			/* reverse z=a/bX transformation and compute log(1/r) */
+			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
+					    bch->a_log_tab[r]+l2);
+			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
+					    bch->a_log_tab[r^1]+l2);
+		}
+	}
+	return n;
+}
+
+/*
+ * compute roots of a degree 3 polynomial over GF(2^m)
+ */
+static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
+				unsigned int *roots)
+{
+	int i, n = 0;
+	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
+
+	if (poly->c[0]) {
+		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
+		e3 = poly->c[3];
+		c2 = gf_div(bch, poly->c[0], e3);
+		b2 = gf_div(bch, poly->c[1], e3);
+		a2 = gf_div(bch, poly->c[2], e3);
+
+		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
+		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
+		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
+		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
+
+		/* find the 4 roots of this affine polynomial */
+		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
+			/* remove a2 from final list of roots */
+			for (i = 0; i < 4; i++) {
+				if (tmp[i] != a2)
+					roots[n++] = a_ilog(bch, tmp[i]);
+			}
+		}
+	}
+	return n;
+}
+
+/*
+ * compute roots of a degree 4 polynomial over GF(2^m)
+ */
+static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
+				unsigned int *roots)
+{
+	int i, l, n = 0;
+	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
+
+	if (poly->c[0] == 0)
+		return 0;
+
+	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
+	e4 = poly->c[4];
+	d = gf_div(bch, poly->c[0], e4);
+	c = gf_div(bch, poly->c[1], e4);
+	b = gf_div(bch, poly->c[2], e4);
+	a = gf_div(bch, poly->c[3], e4);
+
+	/* use Y=1/X transformation to get an affine polynomial */
+	if (a) {
+		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
+		if (c) {
+			/* compute e such that e^2 = c/a */
+			f = gf_div(bch, c, a);
+			l = a_log(bch, f);
+			l += (l & 1) ? GF_N(bch) : 0;
+			e = a_pow(bch, l/2);
+			/*
+			 * use transformation z=X+e:
+			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
+			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
+			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
+			 * z^4 + az^3 +     b'z^2 + d'
+			 */
+			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
+			b = gf_mul(bch, a, e)^b;
+		}
+		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
+		if (d == 0)
+			/* assume all roots have multiplicity 1 */
+			return 0;
+
+		c2 = gf_inv(bch, d);
+		b2 = gf_div(bch, a, d);
+		a2 = gf_div(bch, b, d);
+	} else {
+		/* polynomial is already affine */
+		c2 = d;
+		b2 = c;
+		a2 = b;
+	}
+	/* find the 4 roots of this affine polynomial */
+	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
+		for (i = 0; i < 4; i++) {
+			/* post-process roots (reverse transformations) */
+			f = a ? gf_inv(bch, roots[i]) : roots[i];
+			roots[i] = a_ilog(bch, f^e);
+		}
+		n = 4;
+	}
+	return n;
+}
+
+/*
+ * build monic, log-based representation of a polynomial
+ */
+static void gf_poly_logrep(struct bch_control *bch,
+			   const struct gf_poly *a, int *rep)
+{
+	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
+
+	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
+	for (i = 0; i < d; i++)
+		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
+}
+
+/*
+ * compute polynomial Euclidean division remainder in GF(2^m)[X]
+ */
+static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
+			const struct gf_poly *b, int *rep)
+{
+	int la, p, m;
+	unsigned int i, j, *c = a->c;
+	const unsigned int d = b->deg;
+
+	if (a->deg < d)
+		return;
+
+	/* reuse or compute log representation of denominator */
+	if (!rep) {
+		rep = bch->cache;
+		gf_poly_logrep(bch, b, rep);
+	}
+
+	for (j = a->deg; j >= d; j--) {
+		if (c[j]) {
+			la = a_log(bch, c[j]);
+			p = j-d;
+			for (i = 0; i < d; i++, p++) {
+				m = rep[i];
+				if (m >= 0)
+					c[p] ^= bch->a_pow_tab[mod_s(bch,
+								     m+la)];
+			}
+		}
+	}
+	a->deg = d-1;
+	while (!c[a->deg] && a->deg)
+		a->deg--;
+}
+
+/*
+ * compute polynomial Euclidean division quotient in GF(2^m)[X]
+ */
+static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
+			const struct gf_poly *b, struct gf_poly *q)
+{
+	if (a->deg >= b->deg) {
+		q->deg = a->deg-b->deg;
+		/* compute a mod b (modifies a) */
+		gf_poly_mod(bch, a, b, NULL);
+		/* quotient is stored in upper part of polynomial a */
+		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
+	} else {
+		q->deg = 0;
+		q->c[0] = 0;
+	}
+}
+
+/*
+ * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
+ */
+static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
+				   struct gf_poly *b)
+{
+	struct gf_poly *tmp;
+
+	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
+
+	if (a->deg < b->deg) {
+		tmp = b;
+		b = a;
+		a = tmp;
+	}
+
+	while (b->deg > 0) {
+		gf_poly_mod(bch, a, b, NULL);
+		tmp = b;
+		b = a;
+		a = tmp;
+	}
+
+	dbg("%s\n", gf_poly_str(a));
+
+	return a;
+}
+
+/*
+ * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
+ * This is used in Berlekamp Trace algorithm for splitting polynomials
+ */
+static void compute_trace_bk_mod(struct bch_control *bch, int k,
+				 const struct gf_poly *f, struct gf_poly *z,
+				 struct gf_poly *out)
+{
+	const int m = GF_M(bch);
+	int i, j;
+
+	/* z contains z^2j mod f */
+	z->deg = 1;
+	z->c[0] = 0;
+	z->c[1] = bch->a_pow_tab[k];
+
+	out->deg = 0;
+	memset(out, 0, GF_POLY_SZ(f->deg));
+
+	/* compute f log representation only once */
+	gf_poly_logrep(bch, f, bch->cache);
+
+	for (i = 0; i < m; i++) {
+		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
+		for (j = z->deg; j >= 0; j--) {
+			out->c[j] ^= z->c[j];
+			z->c[2*j] = gf_sqr(bch, z->c[j]);
+			z->c[2*j+1] = 0;
+		}
+		if (z->deg > out->deg)
+			out->deg = z->deg;
+
+		if (i < m-1) {
+			z->deg *= 2;
+			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
+			gf_poly_mod(bch, z, f, bch->cache);
+		}
+	}
+	while (!out->c[out->deg] && out->deg)
+		out->deg--;
+
+	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
+}
+
+/*
+ * factor a polynomial using Berlekamp Trace algorithm (BTA)
+ */
+static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
+			      struct gf_poly **g, struct gf_poly **h)
+{
+	struct gf_poly *f2 = bch->poly_2t[0];
+	struct gf_poly *q  = bch->poly_2t[1];
+	struct gf_poly *tk = bch->poly_2t[2];
+	struct gf_poly *z  = bch->poly_2t[3];
+	struct gf_poly *gcd;
+
+	dbg("factoring %s...\n", gf_poly_str(f));
+
+	*g = f;
+	*h = NULL;
+
+	/* tk = Tr(a^k.X) mod f */
+	compute_trace_bk_mod(bch, k, f, z, tk);
+
+	if (tk->deg > 0) {
+		/* compute g = gcd(f, tk) (destructive operation) */
+		gf_poly_copy(f2, f);
+		gcd = gf_poly_gcd(bch, f2, tk);
+		if (gcd->deg < f->deg) {
+			/* compute h=f/gcd(f,tk); this will modify f and q */
+			gf_poly_div(bch, f, gcd, q);
+			/* store g and h in-place (clobbering f) */
+			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
+			gf_poly_copy(*g, gcd);
+			gf_poly_copy(*h, q);
+		}
+	}
+}
+
+/*
+ * find roots of a polynomial, using BTZ algorithm; see the beginning of this
+ * file for details
+ */
+static int find_poly_roots(struct bch_control *bch, unsigned int k,
+			   struct gf_poly *poly, unsigned int *roots)
+{
+	int cnt;
+	struct gf_poly *f1, *f2;
+
+	switch (poly->deg) {
+		/* handle low degree polynomials with ad hoc techniques */
+	case 1:
+		cnt = find_poly_deg1_roots(bch, poly, roots);
+		break;
+	case 2:
+		cnt = find_poly_deg2_roots(bch, poly, roots);
+		break;
+	case 3:
+		cnt = find_poly_deg3_roots(bch, poly, roots);
+		break;
+	case 4:
+		cnt = find_poly_deg4_roots(bch, poly, roots);
+		break;
+	default:
+		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
+		cnt = 0;
+		if (poly->deg && (k <= GF_M(bch))) {
+			factor_polynomial(bch, k, poly, &f1, &f2);
+			if (f1)
+				cnt += find_poly_roots(bch, k+1, f1, roots);
+			if (f2)
+				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
+		}
+		break;
+	}
+	return cnt;
+}
+
+#if defined(USE_CHIEN_SEARCH)
+/*
+ * exhaustive root search (Chien) implementation - not used, included only for
+ * reference/comparison tests
+ */
+static int chien_search(struct bch_control *bch, unsigned int len,
+			struct gf_poly *p, unsigned int *roots)
+{
+	int m;
+	unsigned int i, j, syn, syn0, count = 0;
+	const unsigned int k = 8*len+bch->ecc_bits;
+
+	/* use a log-based representation of polynomial */
+	gf_poly_logrep(bch, p, bch->cache);
+	bch->cache[p->deg] = 0;
+	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
+
+	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
+		/* compute elp(a^i) */
+		for (j = 1, syn = syn0; j <= p->deg; j++) {
+			m = bch->cache[j];
+			if (m >= 0)
+				syn ^= a_pow(bch, m+j*i);
+		}
+		if (syn == 0) {
+			roots[count++] = GF_N(bch)-i;
+			if (count == p->deg)
+				break;
+		}
+	}
+	return (count == p->deg) ? count : 0;
+}
+#define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
+#endif /* USE_CHIEN_SEARCH */
+
+/**
+ * decode_bch - decode received codeword and find bit error locations
+ * @bch:      BCH control structure
+ * @data:     received data, ignored if @calc_ecc is provided
+ * @len:      data length in bytes, must always be provided
+ * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
+ * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
+ * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
+ * @errloc:   output array of error locations
+ *
+ * Returns:
+ *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
+ *  invalid parameters were provided
+ *
+ * Depending on the available hw BCH support and the need to compute @calc_ecc
+ * separately (using encode_bch()), this function should be called with one of
+ * the following parameter configurations -
+ *
+ * by providing @data and @recv_ecc only:
+ *   decode_bch(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
+ *
+ * by providing @recv_ecc and @calc_ecc:
+ *   decode_bch(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
+ *
+ * by providing ecc = recv_ecc XOR calc_ecc:
+ *   decode_bch(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
+ *
+ * by providing syndrome results @syn:
+ *   decode_bch(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
+ *
+ * Once decode_bch() has successfully returned with a positive value, error
+ * locations returned in array @errloc should be interpreted as follows -
+ *
+ * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
+ * data correction)
+ *
+ * if (errloc[n] < 8*len), then n-th error is located in data and can be
+ * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
+ *
+ * Note that this function does not perform any data correction by itself, it
+ * merely indicates error locations.
+ */
+int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len,
+	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
+	       const unsigned int *syn, unsigned int *errloc)
+{
+	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
+	unsigned int nbits;
+	int i, err, nroots;
+	uint32_t sum;
+
+	/* sanity check: make sure data length can be handled */
+	if (8*len > (bch->n-bch->ecc_bits))
+		return -EINVAL;
+
+	/* if caller does not provide syndromes, compute them */
+	if (!syn) {
+		if (!calc_ecc) {
+			/* compute received data ecc into an internal buffer */
+			if (!data || !recv_ecc)
+				return -EINVAL;
+			encode_bch(bch, data, len, NULL);
+		} else {
+			/* load provided calculated ecc */
+			load_ecc8(bch, bch->ecc_buf, calc_ecc);
+		}
+		/* load received ecc or assume it was XORed in calc_ecc */
+		if (recv_ecc) {
+			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
+			/* XOR received and calculated ecc */
+			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
+				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
+				sum |= bch->ecc_buf[i];
+			}
+			if (!sum)
+				/* no error found */
+				return 0;
+		}
+		compute_syndromes(bch, bch->ecc_buf, bch->syn);
+		syn = bch->syn;
+	}
+
+	err = compute_error_locator_polynomial(bch, syn);
+	if (err > 0) {
+		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
+		if (err != nroots)
+			err = -1;
+	}
+	if (err > 0) {
+		/* post-process raw error locations for easier correction */
+		nbits = (len*8)+bch->ecc_bits;
+		for (i = 0; i < err; i++) {
+			if (errloc[i] >= nbits) {
+				err = -1;
+				break;
+			}
+			errloc[i] = nbits-1-errloc[i];
+			errloc[i] = (errloc[i] & ~7)|(7-(errloc[i] & 7));
+		}
+	}
+	return (err >= 0) ? err : -EBADMSG;
+}
+EXPORT_SYMBOL_GPL(decode_bch);
+
+/*
+ * generate Galois field lookup tables
+ */
+static int build_gf_tables(struct bch_control *bch, unsigned int poly)
+{
+	unsigned int i, x = 1;
+	const unsigned int k = 1 << deg(poly);
+
+	/* primitive polynomial must be of degree m */
+	if (k != (1u << GF_M(bch)))
+		return -1;
+
+	for (i = 0; i < GF_N(bch); i++) {
+		bch->a_pow_tab[i] = x;
+		bch->a_log_tab[x] = i;
+		if (i && (x == 1))
+			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
+			return -1;
+		x <<= 1;
+		if (x & k)
+			x ^= poly;
+	}
+	bch->a_pow_tab[GF_N(bch)] = 1;
+	bch->a_log_tab[0] = 0;
+
+	return 0;
+}
+
+/*
+ * compute generator polynomial remainder tables for fast encoding
+ */
+static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
+{
+	int i, j, b, d;
+	uint32_t data, hi, lo, *tab;
+	const int l = BCH_ECC_WORDS(bch);
+	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
+	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
+
+	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
+
+	for (i = 0; i < 256; i++) {
+		/* p(X)=i is a small polynomial of weight <= 8 */
+		for (b = 0; b < 4; b++) {
+			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
+			tab = bch->mod8_tab + (b*256+i)*l;
+			data = i << (8*b);
+			while (data) {
+				d = deg(data);
+				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
+				data ^= g[0] >> (31-d);
+				for (j = 0; j < ecclen; j++) {
+					hi = (d < 31) ? g[j] << (d+1) : 0;
+					lo = (j+1 < plen) ?
+						g[j+1] >> (31-d) : 0;
+					tab[j] ^= hi|lo;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * build a base for factoring degree 2 polynomials
+ */
+static int build_deg2_base(struct bch_control *bch)
+{
+	const int m = GF_M(bch);
+	int i, j, r;
+	unsigned int sum, x, y, remaining, ak = 0, xi[m];
+
+	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
+	for (i = 0; i < m; i++) {
+		for (j = 0, sum = 0; j < m; j++)
+			sum ^= a_pow(bch, i*(1 << j));
+
+		if (sum) {
+			ak = bch->a_pow_tab[i];
+			break;
+		}
+	}
+	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
+	remaining = m;
+	memset(xi, 0, sizeof(xi));
+
+	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
+		y = gf_sqr(bch, x)^x;
+		for (i = 0; i < 2; i++) {
+			r = a_log(bch, y);
+			if (y && (r < m) && !xi[r]) {
+				bch->xi_tab[r] = x;
+				xi[r] = 1;
+				remaining--;
+				dbg("x%d = %x\n", r, x);
+				break;
+			}
+			y ^= ak;
+		}
+	}
+	/* should not happen but check anyway */
+	return remaining ? -1 : 0;
+}
+
+static void *bch_alloc(size_t size, int *err)
+{
+	void *ptr;
+
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (ptr == NULL)
+		*err = 1;
+	return ptr;
+}
+
+/*
+ * compute generator polynomial for given (m,t) parameters.
+ */
+static uint32_t *compute_generator_polynomial(struct bch_control *bch)
+{
+	const unsigned int m = GF_M(bch);
+	const unsigned int t = GF_T(bch);
+	int n, err = 0;
+	unsigned int i, j, nbits, r, word, *roots;
+	struct gf_poly *g;
+	uint32_t *genpoly;
+
+	g = bch_alloc(GF_POLY_SZ(m*t), &err);
+	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
+	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
+
+	if (err) {
+		kfree(genpoly);
+		genpoly = NULL;
+		goto finish;
+	}
+
+	/* enumerate all roots of g(X) */
+	memset(roots , 0, (bch->n+1)*sizeof(*roots));
+	for (i = 0; i < t; i++) {
+		for (j = 0, r = 2*i+1; j < m; j++) {
+			roots[r] = 1;
+			r = mod_s(bch, 2*r);
+		}
+	}
+	/* build generator polynomial g(X) */
+	g->deg = 0;
+	g->c[0] = 1;
+	for (i = 0; i < GF_N(bch); i++) {
+		if (roots[i]) {
+			/* multiply g(X) by (X+root) */
+			r = bch->a_pow_tab[i];
+			g->c[g->deg+1] = 1;
+			for (j = g->deg; j > 0; j--)
+				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
+
+			g->c[0] = gf_mul(bch, g->c[0], r);
+			g->deg++;
+		}
+	}
+	/* store left-justified binary representation of g(X) */
+	n = g->deg+1;
+	i = 0;
+
+	while (n > 0) {
+		nbits = (n > 32) ? 32 : n;
+		for (j = 0, word = 0; j < nbits; j++) {
+			if (g->c[n-1-j])
+				word |= 1u << (31-j);
+		}
+		genpoly[i++] = word;
+		n -= nbits;
+	}
+	bch->ecc_bits = g->deg;
+
+finish:
+	kfree(g);
+	kfree(roots);
+
+	return genpoly;
+}
+
+/**
+ * init_bch - initialize a BCH encoder/decoder
+ * @m:          Galois field order, should be in the range 5-15
+ * @t:          maximum error correction capability, in bits
+ * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
+ *
+ * Returns:
+ *  a newly allocated BCH control structure if successful, NULL otherwise
+ *
+ * This initialization can take some time, as lookup tables are built for fast
+ * encoding/decoding; make sure not to call this function from a time critical
+ * path. Usually, init_bch() should be called on module/driver init and
+ * free_bch() should be called to release memory on exit.
+ *
+ * You may provide your own primitive polynomial of degree @m in argument
+ * @prim_poly, or let init_bch() use its default polynomial.
+ *
+ * Once init_bch() has successfully returned a pointer to a newly allocated
+ * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
+ * the structure.
+ */
+struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
+{
+	int err = 0;
+	unsigned int i, words;
+	uint32_t *genpoly;
+	struct bch_control *bch = NULL;
+
+	const int min_m = 5;
+	const int max_m = 15;
+
+	/* default primitive polynomials */
+	static const unsigned int prim_poly_tab[] = {
+		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
+		0x402b, 0x8003,
+	};
+
+#if defined(CONFIG_BCH_CONST_PARAMS)
+	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
+		printk(KERN_ERR "bch encoder/decoder was configured to support "
+		       "parameters m=%d, t=%d only!\n",
+		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
+		goto fail;
+	}
+#endif
+	if ((m < min_m) || (m > max_m))
+		/*
+		 * values of m greater than 15 are not currently supported;
+		 * supporting m > 15 would require changing table base type
+		 * (uint16_t) and a small patch in matrix transposition
+		 */
+		goto fail;
+
+	/* sanity checks */
+	if ((t < 1) || (m*t >= ((1 << m)-1)))
+		/* invalid t value */
+		goto fail;
+
+	/* select a primitive polynomial for generating GF(2^m) */
+	if (prim_poly == 0)
+		prim_poly = prim_poly_tab[m-min_m];
+
+	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
+	if (bch == NULL)
+		goto fail;
+
+	bch->m = m;
+	bch->t = t;
+	bch->n = (1 << m)-1;
+	words  = DIV_ROUND_UP(m*t, 32);
+	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
+	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
+	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
+	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
+	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
+	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
+	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
+	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
+	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
+	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
+
+	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
+		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
+
+	if (err)
+		goto fail;
+
+	err = build_gf_tables(bch, prim_poly);
+	if (err)
+		goto fail;
+
+	/* use generator polynomial for computing encoding tables */
+	genpoly = compute_generator_polynomial(bch);
+	if (genpoly == NULL)
+		goto fail;
+
+	build_mod8_tables(bch, genpoly);
+	kfree(genpoly);
+
+	err = build_deg2_base(bch);
+	if (err)
+		goto fail;
+
+	return bch;
+
+fail:
+	free_bch(bch);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(init_bch);
+
+/**
+ *  free_bch - free the BCH control structure
+ *  @bch:    BCH control structure to release
+ */
+void free_bch(struct bch_control *bch)
+{
+	unsigned int i;
+
+	if (bch) {
+		kfree(bch->a_pow_tab);
+		kfree(bch->a_log_tab);
+		kfree(bch->mod8_tab);
+		kfree(bch->ecc_buf);
+		kfree(bch->ecc_buf2);
+		kfree(bch->xi_tab);
+		kfree(bch->syn);
+		kfree(bch->cache);
+		kfree(bch->elp);
+
+		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
+			kfree(bch->poly_2t[i]);
+
+		kfree(bch);
+	}
+}
+EXPORT_SYMBOL_GPL(free_bch);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
+MODULE_DESCRIPTION("Binary BCH encoder/decoder");

diff --git a/lib/show_mem.c b/lib/show_mem.c
index d8d602b..90cbe4b 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c

@@ -9,7 +9,7 @@
 #include <linux/nmi.h>
 #include <linux/quicklist.h>
 
-void __show_mem(unsigned int filter)
+void show_mem(unsigned int filter)
 {
 	pg_data_t *pgdat;
 	unsigned long total = 0, reserved = 0, shared = 0,
@@ -61,8 +61,3 @@
 		quicklist_total_size());
 #endif
 }
-
-void show_mem(void)
-{
-	__show_mem(0);
-}

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 02bcdd5..bc0ac6b 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c

@@ -433,7 +433,9 @@
 	unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
 	char sym[KSYM_SYMBOL_LEN];
-	if (ext != 'f' && ext != 's')
+	if (ext == 'B')
+		sprint_backtrace(sym, value);
+	else if (ext != 'f' && ext != 's')
 		sprint_symbol(sym, value);
 	else
 		kallsyms_lookup(value, NULL, NULL, NULL, sym);
@@ -808,6 +810,7 @@
  * - 'f' For simple symbolic function names without offset
  * - 'S' For symbolic direct pointers with offset
  * - 's' For symbolic direct pointers without offset
+ * - 'B' For backtraced symbolic direct pointers with offset
  * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
  * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201]
  * - 'M' For a 6-byte MAC address, it prints the address in the
@@ -867,6 +870,7 @@
 		/* Fallthrough */
 	case 'S':
 	case 's':
+	case 'B':
 		return symbol_string(buf, end, ptr, spec, *fmt);
 	case 'R':
 	case 'r':
@@ -1134,6 +1138,7 @@
  * %ps output the name of a text symbol without offset
  * %pF output the name of a function pointer with its offset
  * %pf output the name of a function pointer without its offset
+ * %pB output the name of a backtrace symbol with its offset
  * %pR output the address range in a struct resource with decoded flags
  * %pr output the address range in a struct resource with raw flags
  * %pM output a 6-byte MAC address with colons

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8fe9d34..0d9a036 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c

@@ -67,14 +67,14 @@
 	struct inode *inode;
 
 	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
-	spin_lock(&inode_lock);
+	spin_lock(&inode_wb_list_lock);
 	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
 		nr_dirty++;
 	list_for_each_entry(inode, &wb->b_io, i_wb_list)
 		nr_io++;
 	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
 		nr_more_io++;
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode_wb_list_lock);
 
 	global_dirty_limits(&background_thresh, &dirty_thresh);
 	bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
@@ -676,11 +676,11 @@
 	if (bdi_has_dirty_io(bdi)) {
 		struct bdi_writeback *dst = &default_backing_dev_info.wb;
 
-		spin_lock(&inode_lock);
+		spin_lock(&inode_wb_list_lock);
 		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
 		list_splice(&bdi->wb.b_io, &dst->b_io);
 		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode_wb_list_lock);
 	}
 
 	bdi_unregister(bdi);

diff --git a/mm/filemap.c b/mm/filemap.c
index 04d1992..c641edf 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -80,8 +80,8 @@
  *  ->i_mutex
  *    ->i_alloc_sem             (various)
  *
- *  ->inode_lock
- *    ->sb_lock			(fs/fs-writeback.c)
+ *  inode_wb_list_lock
+ *    sb_lock			(fs/fs-writeback.c)
  *    ->mapping->tree_lock	(__sync_single_inode)
  *
  *  ->i_mmap_lock
@@ -98,8 +98,10 @@
  *    ->zone.lru_lock		(check_pte_range->isolate_lru_page)
  *    ->private_lock		(page_remove_rmap->set_page_dirty)
  *    ->tree_lock		(page_remove_rmap->set_page_dirty)
- *    ->inode_lock		(page_remove_rmap->set_page_dirty)
- *    ->inode_lock		(zap_pte_range->set_page_dirty)
+ *    inode_wb_list_lock	(page_remove_rmap->set_page_dirty)
+ *    ->inode->i_lock		(page_remove_rmap->set_page_dirty)
+ *    inode_wb_list_lock	(zap_pte_range->set_page_dirty)
+ *    ->inode->i_lock		(zap_pte_range->set_page_dirty)
  *    ->private_lock		(zap_pte_range->__set_page_dirty_buffers)
  *
  *  (code doesn't rely on that order, so you could switch it around)

diff --git a/mm/memory.c b/mm/memory.c
index 51a5c23..9da8cab 100644
--- a/mm/memory.c
+++ b/mm/memory.c

@@ -3715,7 +3715,7 @@
 }
 
 /**
- * @access_remote_vm - access another process' address space
+ * access_remote_vm - access another process' address space
  * @mm:		the mm_struct of the target address space
  * @addr:	start address to access
  * @buf:	source or destination buffer

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 62a5cec..6a819d1 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c

@@ -406,7 +406,7 @@
 	task_unlock(current);
 	dump_stack();
 	mem_cgroup_print_oom_info(mem, p);
-	__show_mem(SHOW_MEM_FILTER_NODES);
+	show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
 		dump_tasks(mem, nodemask);
 }

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e5726a..d6e7ba7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -2195,7 +2195,7 @@
 			current->comm, order, gfp_mask);
 		dump_stack();
 		if (!should_suppress_show_mem())
-			__show_mem(filter);
+			show_mem(filter);
 	}
 	return page;
 got_pg:

diff --git a/mm/percpu.c b/mm/percpu.c
index c5feb79..8eb5366 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c

@@ -1648,8 +1648,8 @@
 	/* warn if maximum distance is further than 75% of vmalloc space */
 	if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) {
 		pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc "
-			   "space 0x%lx\n",
-			   max_distance, VMALLOC_END - VMALLOC_START);
+			   "space 0x%lx\n", max_distance,
+			   (unsigned long)(VMALLOC_END - VMALLOC_START));
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 		/* and fail if we have fallback */
 		rc = -EINVAL;

diff --git a/mm/rmap.c b/mm/rmap.c
index 4a8e99a..8da044a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c

@@ -31,11 +31,12 @@
  *             swap_lock (in swap_duplicate, swap_info_get)
  *               mmlist_lock (in mmput, drain_mmlist and others)
  *               mapping->private_lock (in __set_page_dirty_buffers)
- *               inode_lock (in set_page_dirty's __mark_inode_dirty)
+ *               inode->i_lock (in set_page_dirty's __mark_inode_dirty)
+ *               inode_wb_list_lock (in set_page_dirty's __mark_inode_dirty)
  *                 sb_lock (within inode_lock in fs/fs-writeback.c)
  *                 mapping->tree_lock (widely used, in set_page_dirty,
  *                           in arch-dependent flush_dcache_mmap_lock,
- *                           within inode_lock in __sync_single_inode)
+ *                           within inode_wb_list_lock in __sync_single_inode)
  *
  * (code doesn't rely on that order so it could be switched around)
  * ->tasklist_lock

diff --git a/mm/slub.c b/mm/slub.c
index 93de30d..f881874 100644
--- a/mm/slub.c
+++ b/mm/slub.c

@@ -849,11 +849,11 @@
 		local_irq_save(flags);
 		kmemcheck_slab_free(s, x, s->objsize);
 		debug_check_no_locks_freed(x, s->objsize);
-		if (!(s->flags & SLAB_DEBUG_OBJECTS))
-			debug_check_no_obj_freed(x, s->objsize);
 		local_irq_restore(flags);
 	}
 #endif
+	if (!(s->flags & SLAB_DEBUG_OBJECTS))
+		debug_check_no_obj_freed(x, s->objsize);
 }
 
 /*
@@ -1604,7 +1604,7 @@
 
 void init_kmem_cache_cpus(struct kmem_cache *s)
 {
-#if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT)
+#ifdef CONFIG_CMPXCHG_LOCAL
 	int cpu;
 
 	for_each_possible_cpu(cpu)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 030a002..f61eb2e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c

@@ -445,9 +445,9 @@
 	ip6h->payload_len = htons(8 + sizeof(*mldq));
 	ip6h->nexthdr = IPPROTO_HOPOPTS;
 	ip6h->hop_limit = 1;
+	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
 	ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
 			   &ip6h->saddr);
-	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
 	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);

diff --git a/net/core/dev.c b/net/core/dev.c
index 0b88eba..f453370 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c

@@ -1353,14 +1353,17 @@
  */
 void dev_disable_lro(struct net_device *dev)
 {
-	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
-	    dev->ethtool_ops->set_flags) {
-		u32 flags = dev->ethtool_ops->get_flags(dev);
-		if (flags & ETH_FLAG_LRO) {
-			flags &= ~ETH_FLAG_LRO;
-			dev->ethtool_ops->set_flags(dev, flags);
-		}
-	}
+	u32 flags;
+
+	if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
+		flags = dev->ethtool_ops->get_flags(dev);
+	else
+		flags = ethtool_op_get_flags(dev);
+
+	if (!(flags & ETH_FLAG_LRO))
+		return;
+
+	__ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
 	WARN_ON(dev->features & NETIF_F_LRO);
 }
 EXPORT_SYMBOL(dev_disable_lro);

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a1086fb..24bd574 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c

@@ -513,7 +513,7 @@
 	}
 }
 
-static int __ethtool_set_flags(struct net_device *dev, u32 data)
+int __ethtool_set_flags(struct net_device *dev, u32 data)
 {
 	u32 changed;
 

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6d85800d..5345b0b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c

@@ -64,6 +64,8 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 
+#include "fib_lookup.h"
+
 static struct ipv4_devconf ipv4_devconf = {
 	.data = {
 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
@@ -151,6 +153,20 @@
 			break;
 		}
 	}
+	if (!result) {
+		struct flowi4 fl4 = { .daddr = addr };
+		struct fib_result res = { 0 };
+		struct fib_table *local;
+
+		/* Fallback to FIB local table so that communication
+		 * over loopback subnets work.
+		 */
+		local = fib_get_table(net, RT_TABLE_LOCAL);
+		if (local &&
+		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
+		    res.type == RTN_LOCAL)
+			result = FIB_RES_DEV(res);
+	}
 	if (result && devref)
 		dev_hold(result);
 	rcu_read_unlock();
@@ -345,6 +361,17 @@
 		}
 	}
 
+	/* On promotion all secondaries from subnet are changing
+	 * the primary IP, we must remove all their routes silently
+	 * and later to add them back with new prefsrc. Do this
+	 * while all addresses are on the device list.
+	 */
+	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
+		if (ifa1->ifa_mask == ifa->ifa_mask &&
+		    inet_ifa_match(ifa1->ifa_address, ifa))
+			fib_del_ifaddr(ifa, ifa1);
+	}
+
 	/* 2. Unlink it */
 
 	*ifap = ifa1->ifa_next;
@@ -364,6 +391,7 @@
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 
 	if (promote) {
+		struct in_ifaddr *next_sec = promote->ifa_next;
 
 		if (prev_prom) {
 			prev_prom->ifa_next = promote->ifa_next;
@@ -375,7 +403,7 @@
 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
 		blocking_notifier_call_chain(&inetaddr_chain,
 				NETDEV_UP, promote);
-		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
+		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 			if (ifa1->ifa_mask != ifa->ifa_mask ||
 			    !inet_ifa_match(ifa1->ifa_address, ifa))
 					continue;

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a373a25..f116ce8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c

@@ -228,7 +228,7 @@
 		if (res.type != RTN_LOCAL || !accept_local)
 			goto e_inval;
 	}
-	*spec_dst = FIB_RES_PREFSRC(res);
+	*spec_dst = FIB_RES_PREFSRC(net, res);
 	fib_combine_itag(itag, &res);
 	dev_match = false;
 
@@ -258,7 +258,7 @@
 	ret = 0;
 	if (fib_lookup(net, &fl4, &res) == 0) {
 		if (res.type == RTN_UNICAST) {
-			*spec_dst = FIB_RES_PREFSRC(res);
+			*spec_dst = FIB_RES_PREFSRC(net, res);
 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 		}
 	}
@@ -722,12 +722,17 @@
 	}
 }
 
-static void fib_del_ifaddr(struct in_ifaddr *ifa)
+/* Delete primary or secondary address.
+ * Optionally, on secondary address promotion consider the addresses
+ * from subnet iprim as deleted, even if they are in device list.
+ * In this case the secondary ifa can be in device list.
+ */
+void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct net_device *dev = in_dev->dev;
 	struct in_ifaddr *ifa1;
-	struct in_ifaddr *prim = ifa;
+	struct in_ifaddr *prim = ifa, *prim1 = NULL;
 	__be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
 	__be32 any = ifa->ifa_address & ifa->ifa_mask;
 #define LOCAL_OK	1
@@ -735,17 +740,26 @@
 #define BRD0_OK		4
 #define BRD1_OK		8
 	unsigned ok = 0;
+	int subnet = 0;		/* Primary network */
+	int gone = 1;		/* Address is missing */
+	int same_prefsrc = 0;	/* Another primary with same IP */
 
-	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
-		fib_magic(RTM_DELROUTE,
-			  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
-			  any, ifa->ifa_prefixlen, prim);
-	else {
+	if (ifa->ifa_flags & IFA_F_SECONDARY) {
 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 		if (prim == NULL) {
 			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
 			return;
 		}
+		if (iprim && iprim != prim) {
+			printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
+			return;
+		}
+	} else if (!ipv4_is_zeronet(any) &&
+		   (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
+		fib_magic(RTM_DELROUTE,
+			  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+			  any, ifa->ifa_prefixlen, prim);
+		subnet = 1;
 	}
 
 	/* Deletion is more complicated than add.
@@ -755,6 +769,49 @@
 	 */
 
 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+		if (ifa1 == ifa) {
+			/* promotion, keep the IP */
+			gone = 0;
+			continue;
+		}
+		/* Ignore IFAs from our subnet */
+		if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
+		    inet_ifa_match(ifa1->ifa_address, iprim))
+			continue;
+
+		/* Ignore ifa1 if it uses different primary IP (prefsrc) */
+		if (ifa1->ifa_flags & IFA_F_SECONDARY) {
+			/* Another address from our subnet? */
+			if (ifa1->ifa_mask == prim->ifa_mask &&
+			    inet_ifa_match(ifa1->ifa_address, prim))
+				prim1 = prim;
+			else {
+				/* We reached the secondaries, so
+				 * same_prefsrc should be determined.
+				 */
+				if (!same_prefsrc)
+					continue;
+				/* Search new prim1 if ifa1 is not
+				 * using the current prim1
+				 */
+				if (!prim1 ||
+				    ifa1->ifa_mask != prim1->ifa_mask ||
+				    !inet_ifa_match(ifa1->ifa_address, prim1))
+					prim1 = inet_ifa_byprefix(in_dev,
+							ifa1->ifa_address,
+							ifa1->ifa_mask);
+				if (!prim1)
+					continue;
+				if (prim1->ifa_local != prim->ifa_local)
+					continue;
+			}
+		} else {
+			if (prim->ifa_local != ifa1->ifa_local)
+				continue;
+			prim1 = ifa1;
+			if (prim != prim1)
+				same_prefsrc = 1;
+		}
 		if (ifa->ifa_local == ifa1->ifa_local)
 			ok |= LOCAL_OK;
 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
@@ -763,19 +820,37 @@
 			ok |= BRD1_OK;
 		if (any == ifa1->ifa_broadcast)
 			ok |= BRD0_OK;
+		/* primary has network specific broadcasts */
+		if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
+			__be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
+			__be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
+
+			if (!ipv4_is_zeronet(any1)) {
+				if (ifa->ifa_broadcast == brd1 ||
+				    ifa->ifa_broadcast == any1)
+					ok |= BRD_OK;
+				if (brd == brd1 || brd == any1)
+					ok |= BRD1_OK;
+				if (any == brd1 || any == any1)
+					ok |= BRD0_OK;
+			}
+		}
 	}
 
 	if (!(ok & BRD_OK))
 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
-	if (!(ok & BRD1_OK))
-		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
-	if (!(ok & BRD0_OK))
-		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+	if (subnet && ifa->ifa_prefixlen < 31) {
+		if (!(ok & BRD1_OK))
+			fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+		if (!(ok & BRD0_OK))
+			fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+	}
 	if (!(ok & LOCAL_OK)) {
 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
 
 		/* Check, that this local address finally disappeared. */
-		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
+		if (gone &&
+		    inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
 			/* And the last, but not the least thing.
 			 * We must flush stray FIB entries.
 			 *
@@ -885,6 +960,7 @@
 {
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
 	struct net_device *dev = ifa->ifa_dev->dev;
+	struct net *net = dev_net(dev);
 
 	switch (event) {
 	case NETDEV_UP:
@@ -892,12 +968,12 @@
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		fib_sync_up(dev);
 #endif
-		fib_update_nh_saddrs(dev);
+		atomic_inc(&net->ipv4.dev_addr_genid);
 		rt_cache_flush(dev_net(dev), -1);
 		break;
 	case NETDEV_DOWN:
-		fib_del_ifaddr(ifa);
-		fib_update_nh_saddrs(dev);
+		fib_del_ifaddr(ifa, NULL);
+		atomic_inc(&net->ipv4.dev_addr_genid);
 		if (ifa->ifa_dev->ifa_list == NULL) {
 			/* Last address was deleted from this interface.
 			 * Disable IP.
@@ -915,6 +991,7 @@
 {
 	struct net_device *dev = ptr;
 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
+	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_UNREGISTER) {
 		fib_disable_ip(dev, 2, -1);
@@ -932,6 +1009,7 @@
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		fib_sync_up(dev);
 #endif
+		atomic_inc(&net->ipv4.dev_addr_genid);
 		rt_cache_flush(dev_net(dev), -1);
 		break;
 	case NETDEV_DOWN:

diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 4ec3238..af0f14a 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h

@@ -10,7 +10,6 @@
 	struct fib_info		*fa_info;
 	u8			fa_tos;
 	u8			fa_type;
-	u8			fa_scope;
 	u8			fa_state;
 	struct rcu_head		rcu;
 };
@@ -29,7 +28,7 @@
 extern struct fib_info *fib_create_info(struct fib_config *cfg);
 extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-			 u32 tb_id, u8 type, u8 scope, __be32 dst,
+			 u32 tb_id, u8 type, __be32 dst,
 			 int dst_len, u8 tos, struct fib_info *fi,
 			 unsigned int);
 extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 622ac4c..641a5a2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c

@@ -222,7 +222,7 @@
 	unsigned int mask = (fib_info_hash_size - 1);
 	unsigned int val = fi->fib_nhs;
 
-	val ^= fi->fib_protocol;
+	val ^= (fi->fib_protocol << 8) | fi->fib_scope;
 	val ^= (__force u32)fi->fib_prefsrc;
 	val ^= fi->fib_priority;
 	for_nexthops(fi) {
@@ -248,10 +248,11 @@
 		if (fi->fib_nhs != nfi->fib_nhs)
 			continue;
 		if (nfi->fib_protocol == fi->fib_protocol &&
+		    nfi->fib_scope == fi->fib_scope &&
 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
 		    nfi->fib_priority == fi->fib_priority &&
 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
-			   sizeof(fi->fib_metrics)) == 0 &&
+			   sizeof(u32) * RTAX_MAX) == 0 &&
 		    ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
 			return fi;
@@ -328,7 +329,7 @@
 		goto errout;
 
 	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
-			    fa->fa_type, fa->fa_scope, key, dst_len,
+			    fa->fa_type, key, dst_len,
 			    fa->fa_tos, fa->fa_info, nlm_flags);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -695,6 +696,16 @@
 	fib_info_hash_free(old_laddrhash, bytes);
 }
 
+__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
+{
+	nh->nh_saddr = inet_select_addr(nh->nh_dev,
+					nh->nh_gw,
+					nh->nh_parent->fib_scope);
+	nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
+
+	return nh->nh_saddr;
+}
+
 struct fib_info *fib_create_info(struct fib_config *cfg)
 {
 	int err;
@@ -753,6 +764,7 @@
 
 	fi->fib_net = hold_net(net);
 	fi->fib_protocol = cfg->fc_protocol;
+	fi->fib_scope = cfg->fc_scope;
 	fi->fib_flags = cfg->fc_flags;
 	fi->fib_priority = cfg->fc_priority;
 	fi->fib_prefsrc = cfg->fc_prefsrc;
@@ -854,10 +866,7 @@
 	}
 
 	change_nexthops(fi) {
-		nexthop_nh->nh_cfg_scope = cfg->fc_scope;
-		nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
-							nexthop_nh->nh_gw,
-							nexthop_nh->nh_cfg_scope);
+		fib_info_update_nh_saddr(net, nexthop_nh);
 	} endfor_nexthops(fi)
 
 link_it:
@@ -906,7 +915,7 @@
 }
 
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-		  u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
+		  u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
 		  struct fib_info *fi, unsigned int flags)
 {
 	struct nlmsghdr *nlh;
@@ -928,7 +937,7 @@
 	NLA_PUT_U32(skb, RTA_TABLE, tb_id);
 	rtm->rtm_type = type;
 	rtm->rtm_flags = fi->fib_flags;
-	rtm->rtm_scope = scope;
+	rtm->rtm_scope = fi->fib_scope;
 	rtm->rtm_protocol = fi->fib_protocol;
 
 	if (rtm->rtm_dst_len)
@@ -1084,7 +1093,7 @@
 	list_for_each_entry_rcu(fa, fa_head, fa_list) {
 		struct fib_info *next_fi = fa->fa_info;
 
-		if (fa->fa_scope != res->scope ||
+		if (next_fi->fib_scope != res->scope ||
 		    fa->fa_type != RTN_UNICAST)
 			continue;
 
@@ -1128,24 +1137,6 @@
 	return;
 }
 
-void fib_update_nh_saddrs(struct net_device *dev)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct fib_nh *nh;
-	unsigned int hash;
-
-	hash = fib_devindex_hashfn(dev->ifindex);
-	head = &fib_info_devhash[hash];
-	hlist_for_each_entry(nh, node, head, nh_hash) {
-		if (nh->nh_dev != dev)
-			continue;
-		nh->nh_saddr = inet_select_addr(nh->nh_dev,
-						nh->nh_gw,
-						nh->nh_cfg_scope);
-	}
-}
-
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
 /*

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3d28a35..90a3ff6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c

@@ -1245,7 +1245,6 @@
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
 			if (fa->fa_type == cfg->fc_type &&
-			    fa->fa_scope == cfg->fc_scope &&
 			    fa->fa_info == fi) {
 				fa_match = fa;
 				break;
@@ -1271,7 +1270,6 @@
 			new_fa->fa_tos = fa->fa_tos;
 			new_fa->fa_info = fi;
 			new_fa->fa_type = cfg->fc_type;
-			new_fa->fa_scope = cfg->fc_scope;
 			state = fa->fa_state;
 			new_fa->fa_state = state & ~FA_S_ACCESSED;
 
@@ -1308,7 +1306,6 @@
 	new_fa->fa_info = fi;
 	new_fa->fa_tos = tos;
 	new_fa->fa_type = cfg->fc_type;
-	new_fa->fa_scope = cfg->fc_scope;
 	new_fa->fa_state = 0;
 	/*
 	 * Insert new entry to the list.
@@ -1362,7 +1359,7 @@
 
 			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
 				continue;
-			if (fa->fa_scope < flp->flowi4_scope)
+			if (fa->fa_info->fib_scope < flp->flowi4_scope)
 				continue;
 			fib_alias_accessed(fa);
 			err = fib_props[fa->fa_type].error;
@@ -1388,7 +1385,7 @@
 				res->prefixlen = plen;
 				res->nh_sel = nhsel;
 				res->type = fa->fa_type;
-				res->scope = fa->fa_scope;
+				res->scope = fa->fa_info->fib_scope;
 				res->fi = fi;
 				res->table = tb;
 				res->fa_head = &li->falh;
@@ -1664,7 +1661,9 @@
 
 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
-		     fa->fa_scope == cfg->fc_scope) &&
+		     fa->fa_info->fib_scope == cfg->fc_scope) &&
+		    (!cfg->fc_prefsrc ||
+		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
 		    (!cfg->fc_protocol ||
 		     fi->fib_protocol == cfg->fc_protocol) &&
 		    fib_nh_match(cfg, fi) == 0) {
@@ -1861,7 +1860,6 @@
 				  RTM_NEWROUTE,
 				  tb->tb_id,
 				  fa->fa_type,
-				  fa->fa_scope,
 				  xkey,
 				  plen,
 				  fa->fa_tos,
@@ -2382,7 +2380,7 @@
 				seq_indent(seq, iter->depth+1);
 				seq_printf(seq, "  /%d %s %s", li->plen,
 					   rtn_scope(buf1, sizeof(buf1),
-						     fa->fa_scope),
+						     fa->fa_info->fib_scope),
 					   rtn_type(buf2, sizeof(buf2),
 						    fa->fa_type));
 				if (fa->fa_tos)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 870b518..4b0c811 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c

@@ -1593,8 +1593,6 @@
 			rt->rt_peer_genid = rt_peer_genid();
 		}
 		check_peer_pmtu(dst, peer);
-
-		inet_putpeer(peer);
 	}
 }
 
@@ -1720,7 +1718,7 @@
 
 		rcu_read_lock();
 		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
-			src = FIB_RES_PREFSRC(res);
+			src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
 		else
 			src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
 					RT_SCOPE_UNIVERSE);
@@ -2617,7 +2615,7 @@
 		fib_select_default(&res);
 
 	if (!fl4.saddr)
-		fl4.saddr = FIB_RES_PREFSRC(res);
+		fl4.saddr = FIB_RES_PREFSRC(net, res);
 
 	dev_out = FIB_RES_DEV(res);
 	fl4.flowi4_oif = dev_out->ifindex;
@@ -3221,6 +3219,8 @@
 {
 	get_random_bytes(&net->ipv4.rt_genid,
 			 sizeof(net->ipv4.rt_genid));
+	get_random_bytes(&net->ipv4.dev_addr_genid,
+			 sizeof(net->ipv4.dev_addr_genid));
 	return 0;
 }
 

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index da782e7..bef9f04 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c

@@ -2659,7 +2659,7 @@
 #define DBGUNDO(x...) do { } while (0)
 #endif
 
-static void tcp_undo_cwr(struct sock *sk, const int undo)
+static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2671,14 +2671,13 @@
 		else
 			tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
 
-		if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
+		if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
 			tp->snd_ssthresh = tp->prior_ssthresh;
 			TCP_ECN_withdraw_cwr(tp);
 		}
 	} else {
 		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
 	}
-	tcp_moderate_cwnd(tp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -2699,7 +2698,7 @@
 		 * or our original transmission succeeded.
 		 */
 		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
-		tcp_undo_cwr(sk, 1);
+		tcp_undo_cwr(sk, true);
 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
 			mib_idx = LINUX_MIB_TCPLOSSUNDO;
 		else
@@ -2726,7 +2725,7 @@
 
 	if (tp->undo_marker && !tp->undo_retrans) {
 		DBGUNDO(sk, "D-SACK");
-		tcp_undo_cwr(sk, 1);
+		tcp_undo_cwr(sk, true);
 		tp->undo_marker = 0;
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
 	}
@@ -2779,7 +2778,7 @@
 		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
 
 		DBGUNDO(sk, "Hoe");
-		tcp_undo_cwr(sk, 0);
+		tcp_undo_cwr(sk, false);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
 
 		/* So... Do not make Hoe's retransmit yet.
@@ -2808,7 +2807,7 @@
 
 		DBGUNDO(sk, "partial loss");
 		tp->lost_out = 0;
-		tcp_undo_cwr(sk, 1);
+		tcp_undo_cwr(sk, true);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
 		inet_csk(sk)->icsk_retransmits = 0;
 		tp->undo_marker = 0;
@@ -2822,8 +2821,11 @@
 static inline void tcp_complete_cwr(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+	/* Do not moderate cwnd if it's already undone in cwr or recovery */
+	if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
+		tp->snd_cwnd = tp->snd_ssthresh;
+		tp->snd_cwnd_stamp = tcp_time_stamp;
+	}
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
 
@@ -3494,7 +3496,7 @@
 	if (flag & FLAG_ECE)
 		tcp_ratehalving_spur_to_response(sk);
 	else
-		tcp_undo_cwr(sk, 1);
+		tcp_undo_cwr(sk, true);
 }
 
 /* F-RTO spurious RTO detection algorithm (RFC4138)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6814c87..843406f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c

@@ -854,7 +854,7 @@
 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 }
 
-struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
+struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 				    struct flowi6 *fl6)
 {
 	int flags = 0;

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 5a11078..d0311a3 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c

@@ -243,6 +243,7 @@
 	memcpy(sta->sta.addr, addr, ETH_ALEN);
 	sta->local = local;
 	sta->sdata = sdata;
+	sta->last_rx = jiffies;
 
 	ewma_init(&sta->avg_signal, 1024, 8);
 

diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 8b40610..e3c36a2 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c

@@ -160,6 +160,28 @@
 
 EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
 
+struct gss_api_mech *
+gss_mech_get_by_OID(struct xdr_netobj *obj)
+{
+	struct gss_api_mech	*pos, *gm = NULL;
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		if (obj->len == pos->gm_oid.len) {
+			if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
+				if (try_module_get(pos->gm_owner))
+					gm = pos;
+				break;
+			}
+		}
+	}
+	spin_unlock(&registered_mechs_lock);
+	return gm;
+
+}
+
+EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
+
 static inline int
 mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
 {
@@ -193,6 +215,22 @@
 
 EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
 
+int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
+{
+	struct gss_api_mech *pos = NULL;
+	int i = 0;
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		array_ptr[i] = pos->gm_pfs->pseudoflavor;
+		i++;
+	}
+	spin_unlock(&registered_mechs_lock);
+	return i;
+}
+
+EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors);
+
 u32
 gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
 {

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index be96d42..1e336a0 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c

@@ -710,6 +710,8 @@
 	if (sk == NULL)
 		return;
 
+	transport->srcport = 0;
+
 	write_lock_bh(&sk->sk_callback_lock);
 	transport->inet = NULL;
 	transport->sock = NULL;

diff --git a/sound/core/init.c b/sound/core/init.c
index 3e65da2..a0080aa 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c

@@ -848,6 +848,7 @@
 		return -ENOMEM;
 	mfile->file = file;
 	mfile->disconnected_f_op = NULL;
+	INIT_LIST_HEAD(&mfile->shutdown_list);
 	spin_lock(&card->files_lock);
 	if (card->shutdown) {
 		spin_unlock(&card->files_lock);
@@ -883,6 +884,9 @@
 	list_for_each_entry(mfile, &card->files_list, list) {
 		if (mfile->file == file) {
 			list_del(&mfile->list);
+			spin_lock(&shutdown_lock);
+			list_del(&mfile->shutdown_list);
+			spin_unlock(&shutdown_lock);
 			if (mfile->disconnected_f_op)
 				fops_put(mfile->disconnected_f_op);
 			found = mfile;

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index ae42b65..fe5c803 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c

@@ -3201,15 +3201,6 @@
 EXPORT_SYMBOL(snd_pcm_lib_mmap_iomem);
 #endif /* SNDRV_PCM_INFO_MMAP */
 
-/* mmap callback with pgprot_noncached */
-int snd_pcm_lib_mmap_noncached(struct snd_pcm_substream *substream,
-			       struct vm_area_struct *area)
-{
-	area->vm_page_prot = pgprot_noncached(area->vm_page_prot);
-	return snd_pcm_default_mmap(substream, area);
-}
-EXPORT_SYMBOL(snd_pcm_lib_mmap_noncached);
-
 /*
  * mmap DMA buffer
  */

diff --git a/sound/oss/dev_table.h b/sound/oss/dev_table.h
index b7617be..0199a31 100644
--- a/sound/oss/dev_table.h
+++ b/sound/oss/dev_table.h

@@ -271,7 +271,7 @@
 	void (*reset) (int dev);
 	void (*hw_control) (int dev, unsigned char *event);
 	int (*load_patch) (int dev, int format, const char __user *addr,
-	     int offs, int count, int pmgr_flag);
+	     int count, int pmgr_flag);
 	void (*aftertouch) (int dev, int voice, int pressure);
 	void (*controller) (int dev, int voice, int ctrl_num, int value);
 	void (*panning) (int dev, int voice, int value);

diff --git a/sound/oss/midi_synth.c b/sound/oss/midi_synth.c
index 3c09374..2292c23 100644
--- a/sound/oss/midi_synth.c
+++ b/sound/oss/midi_synth.c

@@ -476,7 +476,7 @@
 
 int
 midi_synth_load_patch(int dev, int format, const char __user *addr,
-		      int offs, int count, int pmgr_flag)
+		      int count, int pmgr_flag)
 {
 	int             orig_dev = synth_devs[dev]->midi_dev;
 
@@ -491,33 +491,29 @@
 	if (!prefix_cmd(orig_dev, 0xf0))
 		return 0;
 
+	/* Invalid patch format */
 	if (format != SYSEX_PATCH)
-	{
-/*		  printk("MIDI Error: Invalid patch format (key) 0x%x\n", format);*/
 		  return -EINVAL;
-	}
+
+	/* Patch header too short */
 	if (count < hdr_size)
-	{
-/*		printk("MIDI Error: Patch header too short\n");*/
 		return -EINVAL;
-	}
+
 	count -= hdr_size;
 
 	/*
-	 * Copy the header from user space but ignore the first bytes which have
-	 * been transferred already.
+	 * Copy the header from user space
 	 */
 
-	if(copy_from_user(&((char *) &sysex)[offs], &(addr)[offs], hdr_size - offs))
+	if (copy_from_user(&sysex, addr, hdr_size))
 		return -EFAULT;
- 
- 	if (count < sysex.len)
-	{
-/*		printk(KERN_WARNING "MIDI Warning: Sysex record too short (%d<%d)\n", count, (int) sysex.len);*/
+
+	/* Sysex record too short */
+	if ((unsigned)count < (unsigned)sysex.len)
 		sysex.len = count;
-	}
-  	left = sysex.len;
-  	src_offs = 0;
+
+	left = sysex.len;
+	src_offs = 0;
 
 	for (i = 0; i < left && !signal_pending(current); i++)
 	{

diff --git a/sound/oss/midi_synth.h b/sound/oss/midi_synth.h
index 6bc9d00..b64ddd6 100644
--- a/sound/oss/midi_synth.h
+++ b/sound/oss/midi_synth.h

@@ -8,7 +8,7 @@
 void midi_synth_close (int dev);
 void midi_synth_hw_control (int dev, unsigned char *event);
 int midi_synth_load_patch (int dev, int format, const char __user * addr,
-		 int offs, int count, int pmgr_flag);
+		 int count, int pmgr_flag);
 void midi_synth_panning (int dev, int channel, int pressure);
 void midi_synth_aftertouch (int dev, int channel, int pressure);
 void midi_synth_controller (int dev, int channel, int ctrl_num, int value);

diff --git a/sound/oss/opl3.c b/sound/oss/opl3.c
index 938c48c..407cd67 100644
--- a/sound/oss/opl3.c
+++ b/sound/oss/opl3.c

@@ -820,7 +820,7 @@
 }
 
 static int opl3_load_patch(int dev, int format, const char __user *addr,
-		int offs, int count, int pmgr_flag)
+		int count, int pmgr_flag)
 {
 	struct sbi_instrument ins;
 
@@ -830,11 +830,7 @@
 		return -EINVAL;
 	}
 
-	/*
-	 * What the fuck is going on here?  We leave junk in the beginning
-	 * of ins and then check the field pretty close to that beginning?
-	 */
-	if(copy_from_user(&((char *) &ins)[offs], addr + offs, sizeof(ins) - offs))
+	if (copy_from_user(&ins, addr, sizeof(ins)))
 		return -EFAULT;
 
 	if (ins.channel < 0 || ins.channel >= SBFM_MAXINSTR)
@@ -849,6 +845,10 @@
 
 static void opl3_panning(int dev, int voice, int value)
 {
+
+	if (voice < 0 || voice >= devc->nr_voice)
+		return;
+
 	devc->voc[voice].panning = value;
 }
 
@@ -1066,8 +1066,15 @@
 
 static void opl3_setup_voice(int dev, int voice, int chn)
 {
-	struct channel_info *info =
-	&synth_devs[dev]->chn_info[chn];
+	struct channel_info *info;
+
+	if (voice < 0 || voice >= devc->nr_voice)
+		return;
+
+	if (chn < 0 || chn > 15)
+		return;
+
+	info = &synth_devs[dev]->chn_info[chn];
 
 	opl3_set_instr(dev, voice, info->pgm_num);
 

diff --git a/sound/oss/sequencer.c b/sound/oss/sequencer.c
index 5ea1098..30bcfe4 100644
--- a/sound/oss/sequencer.c
+++ b/sound/oss/sequencer.c

@@ -241,7 +241,7 @@
 				return -ENXIO;
 
 			fmt = (*(short *) &event_rec[0]) & 0xffff;
-			err = synth_devs[dev]->load_patch(dev, fmt, buf, p + 4, c, 0);
+			err = synth_devs[dev]->load_patch(dev, fmt, buf + p, c, 0);
 			if (err < 0)
 				return err;
 

diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c
index 0ac1f98..f53a31e 100644
--- a/sound/pci/asihpi/asihpi.c
+++ b/sound/pci/asihpi/asihpi.c

@@ -22,21 +22,6 @@
  *  for any purpose including commercial applications.
  */
 
-/* >0: print Hw params, timer vars. >1: print stream write/copy sizes  */
-#define REALLY_VERBOSE_LOGGING 0
-
-#if REALLY_VERBOSE_LOGGING
-#define VPRINTK1 snd_printd
-#else
-#define VPRINTK1(...)
-#endif
-
-#if REALLY_VERBOSE_LOGGING > 1
-#define VPRINTK2 snd_printd
-#else
-#define VPRINTK2(...)
-#endif
-
 #include "hpi_internal.h"
 #include "hpimsginit.h"
 #include "hpioctl.h"
@@ -57,11 +42,25 @@
 #include <sound/tlv.h>
 #include <sound/hwdep.h>
 
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("AudioScience inc. <support@audioscience.com>");
 MODULE_DESCRIPTION("AudioScience ALSA ASI5000 ASI6000 ASI87xx ASI89xx");
 
+#if defined CONFIG_SND_DEBUG_VERBOSE
+/**
+ * snd_printddd - very verbose debug printk
+ * @format: format string
+ *
+ * Works like snd_printk() for debugging purposes.
+ * Ignored when CONFIG_SND_DEBUG_VERBOSE is not set.
+ * Must set snd module debug parameter to 3 to enable at runtime.
+ */
+#define snd_printddd(format, args...) \
+	__snd_printk(3, __FILE__, __LINE__, format, ##args)
+#else
+#define snd_printddd(format, args...)	do { } while (0)
+#endif
+
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;	/* index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;	/* ID for this card */
 static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
@@ -289,7 +288,6 @@
 #define hpi_handle_error(x)  handle_error(x, __LINE__, __FILE__)
 
 /***************************** GENERAL PCM ****************/
-#if REALLY_VERBOSE_LOGGING
 static void print_hwparams(struct snd_pcm_hw_params *p)
 {
 	snd_printd("HWPARAMS \n");
@@ -304,9 +302,6 @@
 	snd_printd("periods %d \n", params_periods(p));
 	snd_printd("buffer_size %d \n", params_buffer_size(p));
 }
-#else
-#define print_hwparams(x)
-#endif
 
 static snd_pcm_format_t hpi_to_alsa_formats[] = {
 	-1,			/* INVALID */
@@ -381,13 +376,13 @@
 				"No local sampleclock, err %d\n", err);
 		}
 
-		for (idx = 0; idx < 100; idx++) {
-			if (hpi_sample_clock_query_local_rate(
-				h_control, idx, &sample_rate)) {
-				if (!idx)
-					snd_printk(KERN_ERR
-						"Local rate query failed\n");
-
+		for (idx = -1; idx < 100; idx++) {
+			if (idx == -1) {
+				if (hpi_sample_clock_get_sample_rate(h_control,
+								&sample_rate))
+					continue;
+			} else if (hpi_sample_clock_query_local_rate(h_control,
+							idx, &sample_rate)) {
 				break;
 			}
 
@@ -440,8 +435,6 @@
 		}
 	}
 
-	/* printk(KERN_INFO "Supported rates %X %d %d\n",
-	   rates, rate_min, rate_max); */
 	pcmhw->rates = rates;
 	pcmhw->rate_min = rate_min;
 	pcmhw->rate_max = rate_max;
@@ -466,7 +459,7 @@
 	if (err)
 		return err;
 
-	VPRINTK1(KERN_INFO "format %d, %d chans, %d_hz\n",
+	snd_printdd("format %d, %d chans, %d_hz\n",
 				format, params_channels(params),
 				params_rate(params));
 
@@ -489,13 +482,12 @@
 		err = hpi_stream_host_buffer_attach(dpcm->h_stream,
 			params_buffer_bytes(params),  runtime->dma_addr);
 		if (err == 0) {
-			VPRINTK1(KERN_INFO
+			snd_printdd(
 				"stream_host_buffer_attach succeeded %u %lu\n",
 				params_buffer_bytes(params),
 				(unsigned long)runtime->dma_addr);
 		} else {
-			snd_printd(KERN_INFO
-					"stream_host_buffer_attach error %d\n",
+			snd_printd("stream_host_buffer_attach error %d\n",
 					err);
 			return -ENOMEM;
 		}
@@ -504,7 +496,7 @@
 						&dpcm->hpi_buffer_attached,
 						NULL, NULL, NULL);
 
-		VPRINTK1(KERN_INFO "stream_host_buffer_attach status 0x%x\n",
+		snd_printdd("stream_host_buffer_attach status 0x%x\n",
 				dpcm->hpi_buffer_attached);
 	}
 	bytes_per_sec = params_rate(params) * params_channels(params);
@@ -517,7 +509,7 @@
 	dpcm->bytes_per_sec = bytes_per_sec;
 	dpcm->buffer_bytes = params_buffer_bytes(params);
 	dpcm->period_bytes = params_period_bytes(params);
-	VPRINTK1(KERN_INFO "buffer_bytes=%d, period_bytes=%d, bps=%d\n",
+	snd_printdd("buffer_bytes=%d, period_bytes=%d, bps=%d\n",
 			dpcm->buffer_bytes, dpcm->period_bytes, bytes_per_sec);
 
 	return 0;
@@ -573,7 +565,7 @@
 	struct snd_pcm_substream *s;
 	u16 e;
 
-	VPRINTK1(KERN_INFO "%c%d trigger\n",
+	snd_printdd("%c%d trigger\n",
 			SCHR(substream->stream), substream->number);
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
@@ -597,7 +589,7 @@
 				* data??
 				*/
 				unsigned int preload = ds->period_bytes * 1;
-				VPRINTK2(KERN_INFO "%d preload x%x\n", s->number, preload);
+				snd_printddd("%d preload x%x\n", s->number, preload);
 				hpi_handle_error(hpi_outstream_write_buf(
 						ds->h_stream,
 						&runtime->dma_area[0],
@@ -607,7 +599,7 @@
 			}
 
 			if (card->support_grouping) {
-				VPRINTK1(KERN_INFO "\t%c%d group\n",
+				snd_printdd("\t%c%d group\n",
 						SCHR(s->stream),
 						s->number);
 				e = hpi_stream_group_add(
@@ -622,7 +614,7 @@
 			} else
 				break;
 		}
-		VPRINTK1(KERN_INFO "start\n");
+		snd_printdd("start\n");
 		/* start the master stream */
 		snd_card_asihpi_pcm_timer_start(substream);
 		if ((substream->stream == SNDRV_PCM_STREAM_CAPTURE) ||
@@ -644,14 +636,14 @@
 			s->runtime->status->state = SNDRV_PCM_STATE_SETUP;
 
 			if (card->support_grouping) {
-				VPRINTK1(KERN_INFO "\t%c%d group\n",
+				snd_printdd("\t%c%d group\n",
 				SCHR(s->stream),
 					s->number);
 				snd_pcm_trigger_done(s, substream);
 			} else
 				break;
 		}
-		VPRINTK1(KERN_INFO "stop\n");
+		snd_printdd("stop\n");
 
 		/* _prepare and _hwparams reset the stream */
 		hpi_handle_error(hpi_stream_stop(dpcm->h_stream));
@@ -664,12 +656,12 @@
 		break;
 
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
-		VPRINTK1(KERN_INFO "pause release\n");
+		snd_printdd("pause release\n");
 		hpi_handle_error(hpi_stream_start(dpcm->h_stream));
 		snd_card_asihpi_pcm_timer_start(substream);
 		break;
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-		VPRINTK1(KERN_INFO "pause\n");
+		snd_printdd("pause\n");
 		snd_card_asihpi_pcm_timer_stop(substream);
 		hpi_handle_error(hpi_stream_stop(dpcm->h_stream));
 		break;
@@ -741,7 +733,7 @@
 	u16 state;
 	u32 buffer_size, bytes_avail, samples_played, on_card_bytes;
 
-	VPRINTK1(KERN_INFO "%c%d snd_card_asihpi_timer_function\n",
+	snd_printdd("%c%d snd_card_asihpi_timer_function\n",
 				SCHR(substream->stream), substream->number);
 
 	/* find minimum newdata and buffer pos in group */
@@ -770,10 +762,10 @@
 				if ((bytes_avail == 0) &&
 				    (on_card_bytes < ds->pcm_buf_host_rw_ofs)) {
 					hpi_handle_error(hpi_stream_start(ds->h_stream));
-					VPRINTK1(KERN_INFO "P%d start\n", s->number);
+					snd_printdd("P%d start\n", s->number);
 				}
 			} else if (state == HPI_STATE_DRAINED) {
-				VPRINTK1(KERN_WARNING "P%d drained\n",
+				snd_printd(KERN_WARNING "P%d drained\n",
 						s->number);
 				/*snd_pcm_stop(s, SNDRV_PCM_STATE_XRUN);
 				continue; */
@@ -794,13 +786,13 @@
 				newdata);
 		}
 
-		VPRINTK1(KERN_INFO "PB timer hw_ptr x%04lX, appl_ptr x%04lX\n",
+		snd_printdd("hw_ptr x%04lX, appl_ptr x%04lX\n",
 			(unsigned long)frames_to_bytes(runtime,
 						runtime->status->hw_ptr),
 			(unsigned long)frames_to_bytes(runtime,
 						runtime->control->appl_ptr));
 
-		VPRINTK1(KERN_INFO "%d %c%d S=%d, rw=%04X, dma=x%04X, left=x%04X,"
+		snd_printdd("%d %c%d S=%d, rw=%04X, dma=x%04X, left=x%04X,"
 			" aux=x%04X space=x%04X\n",
 			loops, SCHR(s->stream),	s->number,
 			state,	ds->pcm_buf_host_rw_ofs, pcm_buf_dma_ofs, (int)bytes_avail,
@@ -822,7 +814,7 @@
 
 	next_jiffies = max(next_jiffies, 1U);
 	dpcm->timer.expires = jiffies + next_jiffies;
-	VPRINTK1(KERN_INFO "jif %d buf pos x%04X newdata x%04X xfer x%04X\n",
+	snd_printdd("jif %d buf pos x%04X newdata x%04X xfer x%04X\n",
 			next_jiffies, pcm_buf_dma_ofs, newdata, xfercount);
 
 	snd_pcm_group_for_each_entry(s, substream) {
@@ -837,7 +829,7 @@
 		if (xfercount && (on_card_bytes <= ds->period_bytes)) {
 			if (card->support_mmap) {
 				if (s->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-					VPRINTK2(KERN_INFO "P%d write x%04x\n",
+					snd_printddd("P%d write x%04x\n",
 							s->number,
 							ds->period_bytes);
 					hpi_handle_error(
@@ -848,7 +840,7 @@
 							xfercount,
 							&ds->format));
 				} else {
-					VPRINTK2(KERN_INFO "C%d read x%04x\n",
+					snd_printddd("C%d read x%04x\n",
 						s->number,
 						xfercount);
 					hpi_handle_error(
@@ -871,7 +863,7 @@
 static int snd_card_asihpi_playback_ioctl(struct snd_pcm_substream *substream,
 					  unsigned int cmd, void *arg)
 {
-	/* snd_printd(KERN_INFO "Playback ioctl %d\n", cmd); */
+	snd_printdd(KERN_INFO "Playback ioctl %d\n", cmd);
 	return snd_pcm_lib_ioctl(substream, cmd, arg);
 }
 
@@ -881,7 +873,7 @@
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_card_asihpi_pcm *dpcm = runtime->private_data;
 
-	VPRINTK1(KERN_INFO "playback prepare %d\n", substream->number);
+	snd_printdd("playback prepare %d\n", substream->number);
 
 	hpi_handle_error(hpi_outstream_reset(dpcm->h_stream));
 	dpcm->pcm_buf_host_rw_ofs = 0;
@@ -898,7 +890,7 @@
 	snd_pcm_uframes_t ptr;
 
 	ptr = bytes_to_frames(runtime, dpcm->pcm_buf_dma_ofs  % dpcm->buffer_bytes);
-	/* VPRINTK2(KERN_INFO "playback_pointer=x%04lx\n", (unsigned long)ptr); */
+	snd_printddd("playback_pointer=x%04lx\n", (unsigned long)ptr);
 	return ptr;
 }
 
@@ -1014,12 +1006,13 @@
 
 	snd_pcm_hw_constraint_step(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
 		card->update_interval_frames);
+
 	snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
 		card->update_interval_frames * 2, UINT_MAX);
 
 	snd_pcm_set_sync(substream);
 
-	VPRINTK1(KERN_INFO "playback open\n");
+	snd_printdd("playback open\n");
 
 	return 0;
 }
@@ -1030,7 +1023,7 @@
 	struct snd_card_asihpi_pcm *dpcm = runtime->private_data;
 
 	hpi_handle_error(hpi_outstream_close(dpcm->h_stream));
-	VPRINTK1(KERN_INFO "playback close\n");
+	snd_printdd("playback close\n");
 
 	return 0;
 }
@@ -1050,13 +1043,13 @@
 	if (copy_from_user(runtime->dma_area, src, len))
 		return -EFAULT;
 
-	VPRINTK2(KERN_DEBUG "playback copy%d %u bytes\n",
+	snd_printddd("playback copy%d %u bytes\n",
 			substream->number, len);
 
 	hpi_handle_error(hpi_outstream_write_buf(dpcm->h_stream,
 				runtime->dma_area, len, &dpcm->format));
 
-	dpcm->pcm_buf_host_rw_ofs = dpcm->pcm_buf_host_rw_ofs + len;
+	dpcm->pcm_buf_host_rw_ofs += len;
 
 	return 0;
 }
@@ -1066,16 +1059,11 @@
 					    snd_pcm_uframes_t pos,
 					    snd_pcm_uframes_t count)
 {
-	unsigned int len;
-	struct snd_pcm_runtime *runtime = substream->runtime;
-	struct snd_card_asihpi_pcm *dpcm = runtime->private_data;
-
-	len = frames_to_bytes(runtime, count);
-	VPRINTK1(KERN_INFO "playback silence  %u bytes\n", len);
-
-	memset(runtime->dma_area, 0, len);
-	hpi_handle_error(hpi_outstream_write_buf(dpcm->h_stream,
-				runtime->dma_area, len, &dpcm->format));
+	/* Usually writes silence to DMA buffer, which should be overwritten
+	by real audio later.  Our fifos cannot be overwritten, and are not
+	free-running DMAs. Silence is output on fifo underflow.
+	This callback is still required to allow the copy callback to be used.
+	*/
 	return 0;
 }
 
@@ -1110,7 +1098,7 @@
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_card_asihpi_pcm *dpcm = runtime->private_data;
 
-	VPRINTK2(KERN_INFO "capture pointer %d=%d\n",
+	snd_printddd("capture pointer %d=%d\n",
 			substream->number, dpcm->pcm_buf_dma_ofs);
 	/* NOTE Unlike playback can't use actual samples_played
 		for the capture position, because those samples aren't yet in
@@ -1135,7 +1123,7 @@
 	dpcm->pcm_buf_dma_ofs = 0;
 	dpcm->pcm_buf_elapsed_dma_ofs = 0;
 
-	VPRINTK1("Capture Prepare %d\n", substream->number);
+	snd_printdd("Capture Prepare %d\n", substream->number);
 	return 0;
 }
 
@@ -1198,7 +1186,7 @@
 	if (dpcm == NULL)
 		return -ENOMEM;
 
-	VPRINTK1("hpi_instream_open adapter %d stream %d\n",
+	snd_printdd("capture open adapter %d stream %d\n",
 		   card->adapter_index, substream->number);
 
 	err = hpi_handle_error(
@@ -1268,7 +1256,7 @@
 
 	len = frames_to_bytes(runtime, count);
 
-	VPRINTK2(KERN_INFO "capture copy%d %d bytes\n", substream->number, len);
+	snd_printddd("capture copy%d %d bytes\n", substream->number, len);
 	hpi_handle_error(hpi_instream_read_buf(dpcm->h_stream,
 				runtime->dma_area, len));
 
@@ -2887,6 +2875,9 @@
 	if (err)
 		asihpi->update_interval_frames = 512;
 
+	if (!asihpi->support_mmap)
+		asihpi->update_interval_frames *= 2;
+
 	hpi_handle_error(hpi_instream_open(asihpi->adapter_index,
 			     0, &h_stream));
 
@@ -2909,7 +2900,6 @@
 			asihpi->support_mrx
 	      );
 
-
 	err = snd_card_asihpi_pcm_new(asihpi, 0, pcm_substreams);
 	if (err < 0) {
 		snd_printk(KERN_ERR "pcm_new failed\n");
@@ -2944,6 +2934,7 @@
 	sprintf(card->longname, "%s %i",
 			card->shortname, asihpi->adapter_index);
 	err = snd_card_register(card);
+
 	if (!err) {
 		hpi_card->snd_card_asihpi = card;
 		dev++;

diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 734c6ee..2942d2a 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c

@@ -4256,6 +4256,84 @@
 }
 
 /*
+ * Precision R5500
+ * 0x12 - HP/line-out
+ * 0x13 - speaker (mono)
+ * 0x15 - mic-in
+ */
+
+static struct hda_verb ad1984a_precision_verbs[] = {
+	/* Unmute main output path */
+	{0x03, AC_VERB_SET_AMP_GAIN_MUTE, 0x27}, /* 0dB */
+	{0x21, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE + 0x1f}, /* 0dB */
+	{0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(5) + 0x17}, /* 0dB */
+	/* Analog mixer; mute as default */
+	{0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+	{0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+	{0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+	{0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+	{0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
+	/* Select mic as input */
+	{0x0c, AC_VERB_SET_CONNECT_SEL, 0x1},
+	{0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE + 0x27}, /* 0dB */
+	/* Configure as mic */
+	{0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+	{0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0x7002}, /* raise mic as default */
+	/* HP unmute */
+	{0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+	/* turn on EAPD */
+	{0x13, AC_VERB_SET_EAPD_BTLENABLE, 0x02},
+	/* unsolicited event for pin-sense */
+	{0x12, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | AD1884A_HP_EVENT},
+	{ } /* end */
+};
+
+static struct snd_kcontrol_new ad1984a_precision_mixers[] = {
+	HDA_CODEC_VOLUME("Master Playback Volume", 0x21, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Master Playback Switch", 0x21, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("PCM Playback Volume", 0x20, 0x5, HDA_INPUT),
+	HDA_CODEC_MUTE("PCM Playback Switch", 0x20, 0x5, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Playback Volume", 0x20, 0x01, HDA_INPUT),
+	HDA_CODEC_MUTE("Mic Playback Switch", 0x20, 0x01, HDA_INPUT),
+	HDA_CODEC_VOLUME("Mic Boost Volume", 0x15, 0x0, HDA_INPUT),
+	HDA_CODEC_MUTE("Front Playback Switch", 0x12, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Speaker Playback Volume", 0x13, 0x0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Capture Volume", 0x0c, 0x0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Capture Switch", 0x0c, 0x0, HDA_OUTPUT),
+	{ } /* end */
+};
+
+
+/* mute internal speaker if HP is plugged */
+static void ad1984a_precision_automute(struct hda_codec *codec)
+{
+	unsigned int present;
+
+	present = snd_hda_jack_detect(codec, 0x12);
+	snd_hda_codec_amp_stereo(codec, 0x13, HDA_OUTPUT, 0,
+				 HDA_AMP_MUTE, present ? HDA_AMP_MUTE : 0);
+}
+
+
+/* unsolicited event for HP jack sensing */
+static void ad1984a_precision_unsol_event(struct hda_codec *codec,
+					 unsigned int res)
+{
+	if ((res >> 26) != AD1884A_HP_EVENT)
+		return;
+	ad1984a_precision_automute(codec);
+}
+
+/* initialize jack-sensing, too */
+static int ad1984a_precision_init(struct hda_codec *codec)
+{
+	ad198x_init(codec);
+	ad1984a_precision_automute(codec);
+	return 0;
+}
+
+
+/*
  * HP Touchsmart
  * port-A (0x11)      - front hp-out
  * port-B (0x14)      - unused
@@ -4384,6 +4462,7 @@
 	AD1884A_MOBILE,
 	AD1884A_THINKPAD,
 	AD1984A_TOUCHSMART,
+	AD1984A_PRECISION,
 	AD1884A_MODELS
 };
 
@@ -4393,9 +4472,11 @@
 	[AD1884A_MOBILE]	= "mobile",
 	[AD1884A_THINKPAD]	= "thinkpad",
 	[AD1984A_TOUCHSMART]	= "touchsmart",
+	[AD1984A_PRECISION]	= "precision",
 };
 
 static struct snd_pci_quirk ad1884a_cfg_tbl[] = {
+	SND_PCI_QUIRK(0x1028, 0x04ac, "Precision R5500", AD1984A_PRECISION),
 	SND_PCI_QUIRK(0x103c, 0x3030, "HP", AD1884A_MOBILE),
 	SND_PCI_QUIRK(0x103c, 0x3037, "HP 2230s", AD1884A_LAPTOP),
 	SND_PCI_QUIRK(0x103c, 0x3056, "HP", AD1884A_MOBILE),
@@ -4489,6 +4570,14 @@
 		codec->patch_ops.unsol_event = ad1984a_thinkpad_unsol_event;
 		codec->patch_ops.init = ad1984a_thinkpad_init;
 		break;
+	case AD1984A_PRECISION:
+		spec->mixers[0] = ad1984a_precision_mixers;
+		spec->init_verbs[spec->num_init_verbs++] =
+			ad1984a_precision_verbs;
+		spec->multiout.dig_out_nid = 0;
+		codec->patch_ops.unsol_event = ad1984a_precision_unsol_event;
+		codec->patch_ops.init = ad1984a_precision_init;
+		break;
 	case AD1984A_TOUCHSMART:
 		spec->mixers[0] = ad1984a_touchsmart_mixers;
 		spec->init_verbs[0] = ad1984a_touchsmart_verbs;

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5d582de..0ef0035 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c

@@ -1290,7 +1290,7 @@
 		case 0x10ec0883:
 		case 0x10ec0885:
 		case 0x10ec0887:
-		case 0x10ec0889:
+		/*case 0x10ec0889:*/ /* this causes an SPDIF problem */
 			alc889_coef_init(codec);
 			break;
 		case 0x10ec0888:

diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index c0dcfca..c66d3f6 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h

@@ -1568,6 +1568,46 @@
 	}
 },
 {
+	USB_DEVICE_VENDOR_SPEC(0x0582, 0x0104),
+	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+		/* .vendor_name = "Roland", */
+		/* .product_name = "UM-1G", */
+		.ifnum = 0,
+		.type = QUIRK_MIDI_FIXED_ENDPOINT,
+		.data = & (const struct snd_usb_midi_endpoint_info) {
+			.out_cables = 0x0001,
+			.in_cables  = 0x0001
+		}
+	}
+},
+{
+	/* Boss JS-8 Jam Station  */
+	USB_DEVICE(0x0582, 0x0109),
+	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+		/* .vendor_name = "BOSS", */
+		/* .product_name = "JS-8", */
+		.ifnum = QUIRK_ANY_INTERFACE,
+		.type = QUIRK_COMPOSITE,
+		.data = (const struct snd_usb_audio_quirk[]) {
+			{
+				.ifnum = 0,
+				.type = QUIRK_AUDIO_STANDARD_INTERFACE
+			},
+			{
+				.ifnum = 1,
+				.type = QUIRK_AUDIO_STANDARD_INTERFACE
+			},
+			{
+				.ifnum = 2,
+				.type = QUIRK_MIDI_STANDARD_INTERFACE
+			},
+			{
+				.ifnum = -1
+			}
+		}
+	}
+},
+{
 	/* has ID 0x0110 when not in Advanced Driver mode */
 	USB_DEVICE_VENDOR_SPEC(0x0582, 0x010f),
 	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 695de4b..e18eb7e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c

@@ -42,9 +42,9 @@
 
 static int perf_evlist__add_sample(struct perf_evlist *evlist,
 				   struct perf_sample *sample,
+				   struct perf_evsel *evsel,
 				   struct addr_location *al)
 {
-	struct perf_evsel *evsel;
 	struct hist_entry *he;
 	int ret;
 
@@ -59,18 +59,6 @@
 		return 0;
 	}
 
-	evsel = perf_evlist__id2evsel(evlist, sample->id);
-	if (evsel == NULL) {
-		/*
-		 * FIXME: Propagate this back, but at least we're in a builtin,
-		 * where exit() is allowed. ;-)
-		 */
-		ui__warning("Invalid %s file, contains samples with id not in "
-			    "its header!\n", input_name);
-		exit_browser(0);
-		exit(1);
-	}
-
 	he = __hists__add_entry(&evsel->hists, al, NULL, 1);
 	if (he == NULL)
 		return -ENOMEM;
@@ -92,6 +80,7 @@
 
 static int process_sample_event(union perf_event *event,
 				struct perf_sample *sample,
+				struct perf_evsel *evsel,
 				struct perf_session *session)
 {
 	struct addr_location al;
@@ -103,7 +92,8 @@
 		return -1;
 	}
 
-	if (!al.filtered && perf_evlist__add_sample(session->evlist, sample, &al)) {
+	if (!al.filtered &&
+	    perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
 		pr_warning("problem incrementing symbol count, "
 			   "skipping event\n");
 		return -1;

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 6b7d911..e821999 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c

@@ -32,6 +32,7 @@
 
 static int diff__process_sample_event(union perf_event *event,
 				      struct perf_sample *sample,
+				      struct perf_evsel *evsel __used,
 				      struct perf_session *session)
 {
 	struct addr_location al;

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index e29f04e..8dfc12b 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c

@@ -43,6 +43,14 @@
 	return perf_event__repipe_synth(event, session);
 }
 
+static int perf_event__repipe_sample(union perf_event *event,
+			      struct perf_sample *sample __used,
+			      struct perf_evsel *evsel __used,
+			      struct perf_session *session)
+{
+	return perf_event__repipe_synth(event, session);
+}
+
 static int perf_event__repipe_mmap(union perf_event *event,
 				   struct perf_sample *sample,
 				   struct perf_session *session)
@@ -124,6 +132,7 @@
 
 static int perf_event__inject_buildid(union perf_event *event,
 				      struct perf_sample *sample,
+				      struct perf_evsel *evsel __used,
 				      struct perf_session *session)
 {
 	struct addr_location al;
@@ -164,7 +173,7 @@
 }
 
 struct perf_event_ops inject_ops = {
-	.sample		= perf_event__repipe,
+	.sample		= perf_event__repipe_sample,
 	.mmap		= perf_event__repipe,
 	.comm		= perf_event__repipe,
 	.fork		= perf_event__repipe,

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 7f618f4..225e963 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c

@@ -305,6 +305,7 @@
 
 static int process_sample_event(union perf_event *event,
 				struct perf_sample *sample,
+				struct perf_evsel *evsel __used,
 				struct perf_session *session)
 {
 	struct thread *thread = perf_session__findnew(session, event->ip.pid);

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 7a2a79d..9ac05aa 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c

@@ -845,7 +845,9 @@
 		die("Unknown type of information\n");
 }
 
-static int process_sample_event(union perf_event *event, struct perf_sample *sample,
+static int process_sample_event(union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel __used,
 				struct perf_session *s)
 {
 	struct thread *thread = perf_session__findnew(s, sample->tid);

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b1b8200..498c6f7 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c

@@ -50,12 +50,12 @@
 
 static int perf_session__add_hist_entry(struct perf_session *session,
 					struct addr_location *al,
-					struct perf_sample *sample)
+					struct perf_sample *sample,
+					struct perf_evsel *evsel)
 {
 	struct symbol *parent = NULL;
 	int err = 0;
 	struct hist_entry *he;
-	struct perf_evsel *evsel;
 
 	if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
 		err = perf_session__resolve_callchain(session, al->thread,
@@ -64,18 +64,6 @@
 			return err;
 	}
 
-	evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-	if (evsel == NULL) {
-		/*
-		 * FIXME: Propagate this back, but at least we're in a builtin,
-		 * where exit() is allowed. ;-)
-		 */
-		ui__warning("Invalid %s file, contains samples with id %" PRIu64 " not in "
-			    "its header!\n", input_name, sample->id);
-		exit_browser(0);
-		exit(1);
-	}
-
 	he = __hists__add_entry(&evsel->hists, al, parent, sample->period);
 	if (he == NULL)
 		return -ENOMEM;
@@ -113,6 +101,7 @@
 
 static int process_sample_event(union perf_event *event,
 				struct perf_sample *sample,
+				struct perf_evsel *evsel,
 				struct perf_session *session)
 {
 	struct addr_location al;
@@ -127,7 +116,7 @@
 	if (al.filtered || (hide_unresolved && al.sym == NULL))
 		return 0;
 
-	if (perf_session__add_hist_entry(session, &al, sample)) {
+	if (perf_session__add_hist_entry(session, &al, sample, evsel)) {
 		pr_debug("problem incrementing symbol period, skipping event\n");
 		return -1;
 	}

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index a32f411..dcfe887 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c

@@ -1603,6 +1603,7 @@
 
 static int process_sample_event(union perf_event *event,
 				struct perf_sample *sample,
+				struct perf_evsel *evsel __used,
 				struct perf_session *session)
 {
 	struct thread *thread;

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 9f5fc54..ac574ea 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c

@@ -162,19 +162,11 @@
 
 static void process_event(union perf_event *event __unused,
 			  struct perf_sample *sample,
+			  struct perf_evsel *evsel,
 			  struct perf_session *session,
 			  struct thread *thread)
 {
-	struct perf_event_attr *attr;
-	struct perf_evsel *evsel;
-
-	evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-	if (evsel == NULL) {
-		pr_err("Invalid data. Contains samples with id not in "
-		       "its header!\n");
-		return;
-	}
-	attr = &evsel->attr;
+	struct perf_event_attr *attr = &evsel->attr;
 
 	if (output_fields[attr->type] == 0)
 		return;
@@ -244,6 +236,7 @@
 
 static int process_sample_event(union perf_event *event,
 				struct perf_sample *sample,
+				struct perf_evsel *evsel,
 				struct perf_session *session)
 {
 	struct thread *thread = perf_session__findnew(session, event->ip.pid);
@@ -264,7 +257,7 @@
 		last_timestamp = sample->time;
 		return 0;
 	}
-	scripting_ops->process_event(event, sample, session, thread);
+	scripting_ops->process_event(event, sample, evsel, session, thread);
 
 	session->hists.stats.total_period += sample->period;
 	return 0;

diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 67c0459..aa26f4d 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c

@@ -488,6 +488,7 @@
 
 static int process_sample_event(union perf_event *event __used,
 				struct perf_sample *sample,
+				struct perf_evsel *evsel __used,
 				struct perf_session *session)
 {
 	struct trace_entry *te;
@@ -506,6 +507,16 @@
 		struct power_entry_old *peo;
 		peo = (void *)te;
 #endif
+		/*
+		 * FIXME: use evsel, its already mapped from id to perf_evsel,
+		 * remove perf_header__find_event infrastructure bits.
+		 * Mapping all these "power:cpu_idle" strings to the tracepoint
+		 * ID and then just comparing against evsel->attr.config.
+		 *
+		 * e.g.:
+		 *
+		 * if (evsel->attr.config == power_cpu_idle_id)
+		 */
 		event_str = perf_header__find_event(te->type);
 
 		if (!event_str)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 70f1075..676b4fb 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c

@@ -515,7 +515,9 @@
 			break;
 		case 'E':
 			if (top.evlist->nr_entries > 1) {
-				int counter;
+				/* Select 0 as the default event: */
+				int counter = 0;
+
 				fprintf(stderr, "\nAvailable events:");
 
 				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 31f934a..a91cd99 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c

@@ -16,6 +16,7 @@
 
 static int build_id__mark_dso_hit(union perf_event *event,
 				  struct perf_sample *sample __used,
+				  struct perf_evsel *evsel __used,
 				  struct perf_session *session)
 {
 	struct addr_location al;

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e5230c0..93862a8 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c

@@ -695,13 +695,50 @@
 	return err;
 }
 
+static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
+						 int input, u64 offset, u64 size)
+{
+	struct perf_session *session = container_of(header, struct perf_session, header);
+	struct {
+		struct perf_event_header   header;
+		u8			   build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+		char			   filename[0];
+	} old_bev;
+	struct build_id_event bev;
+	char filename[PATH_MAX];
+	u64 limit = offset + size;
+
+	while (offset < limit) {
+		ssize_t len;
+
+		if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+			return -1;
+
+		if (header->needs_swap)
+			perf_event_header__bswap(&old_bev.header);
+
+		len = old_bev.header.size - sizeof(old_bev);
+		if (read(input, filename, len) != len)
+			return -1;
+
+		bev.header = old_bev.header;
+		bev.pid	   = 0;
+		memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
+		__event_process_build_id(&bev, filename, session);
+
+		offset += bev.header.size;
+	}
+
+	return 0;
+}
+
 static int perf_header__read_build_ids(struct perf_header *header,
 				       int input, u64 offset, u64 size)
 {
 	struct perf_session *session = container_of(header, struct perf_session, header);
 	struct build_id_event bev;
 	char filename[PATH_MAX];
-	u64 limit = offset + size;
+	u64 limit = offset + size, orig_offset = offset;
 	int err = -1;
 
 	while (offset < limit) {
@@ -716,6 +753,24 @@
 		len = bev.header.size - sizeof(bev);
 		if (read(input, filename, len) != len)
 			goto out;
+		/*
+		 * The a1645ce1 changeset:
+		 *
+		 * "perf: 'perf kvm' tool for monitoring guest performance from host"
+		 *
+		 * Added a field to struct build_id_event that broke the file
+		 * format.
+		 *
+		 * Since the kernel build-id is the first entry, process the
+		 * table using the old format if the well known
+		 * '[kernel.kallsyms]' string for the kernel build-id has the
+		 * first 4 characters chopped off (where the pid_t sits).
+		 */
+		if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
+			if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
+				return -1;
+			return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
+		}
 
 		__event_process_build_id(&bev, filename, session);
 

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index cb6858a..3beb97c 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h

@@ -29,6 +29,7 @@
 	u32 nr_events[PERF_RECORD_HEADER_MAX];
 	u32 nr_unknown_events;
 	u32 nr_invalid_chains;
+	u32 nr_unknown_id;
 };
 
 enum hist_column {

diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 6214272..74350ff 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c

@@ -247,6 +247,7 @@
 
 static void perl_process_event(union perf_event *pevent __unused,
 			       struct perf_sample *sample,
+			       struct perf_evsel *evsel,
 			       struct perf_session *session __unused,
 			       struct thread *thread)
 {

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 1b85d60..6ccf70e 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c

@@ -206,6 +206,7 @@
 
 static void python_process_event(union perf_event *pevent __unused,
 				 struct perf_sample *sample,
+				 struct perf_evsel *evsel __unused,
 				 struct perf_session *session __unused,
 				 struct thread *thread)
 {

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c68cf40..caa2245 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c

@@ -280,6 +280,15 @@
 	return 0;
 }
 
+static int process_event_sample_stub(union perf_event *event __used,
+				     struct perf_sample *sample __used,
+				     struct perf_evsel *evsel __used,
+				     struct perf_session *session __used)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
 static int process_event_stub(union perf_event *event __used,
 			      struct perf_sample *sample __used,
 			      struct perf_session *session __used)
@@ -303,7 +312,7 @@
 static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
 {
 	if (handler->sample == NULL)
-		handler->sample = process_event_stub;
+		handler->sample = process_event_sample_stub;
 	if (handler->mmap == NULL)
 		handler->mmap = process_event_stub;
 	if (handler->comm == NULL)
@@ -698,12 +707,19 @@
 				      struct perf_event_ops *ops,
 				      u64 file_offset)
 {
+	struct perf_evsel *evsel;
+
 	dump_event(session, event, file_offset, sample);
 
 	switch (event->header.type) {
 	case PERF_RECORD_SAMPLE:
 		dump_sample(session, event, sample);
-		return ops->sample(event, sample, session);
+		evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+		if (evsel == NULL) {
+			++session->hists.stats.nr_unknown_id;
+			return -1;
+		}
+		return ops->sample(event, sample, evsel, session);
 	case PERF_RECORD_MMAP:
 		return ops->mmap(event, sample, session);
 	case PERF_RECORD_COMM:
@@ -845,6 +861,11 @@
 			    session->hists.stats.nr_unknown_events);
 	}
 
+	if (session->hists.stats.nr_unknown_id != 0) {
+		ui__warning("%u samples with id not present in the header\n",
+			    session->hists.stats.nr_unknown_id);
+	}
+
  	if (session->hists.stats.nr_invalid_chains != 0) {
  		ui__warning("Found invalid callchains!\n\n"
  			    "%u out of %u events were discarded for this reason.\n\n"

diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 0b3c9af..1ac481f 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h

@@ -55,8 +55,11 @@
 	char			filename[0];
 };
 
+struct perf_evsel;
 struct perf_event_ops;
 
+typedef int (*event_sample)(union perf_event *event, struct perf_sample *sample,
+			    struct perf_evsel *evsel, struct perf_session *session);
 typedef int (*event_op)(union perf_event *self, struct perf_sample *sample,
 			struct perf_session *session);
 typedef int (*event_synth_op)(union perf_event *self,
@@ -65,8 +68,8 @@
 			 struct perf_event_ops *ops);
 
 struct perf_event_ops {
-	event_op	sample,
-			mmap,
+	event_sample	sample;
+	event_op	mmap,
 			comm,
 			fork,
 			exit,

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 651dbfe..17df793 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c

@@ -1486,7 +1486,9 @@
 	 * On the first pass, only load images if they have a full symtab.
 	 * Failing that, do a second pass where we accept .dynsym also
 	 */
-	for (self->symtab_type = SYMTAB__BUILD_ID_CACHE, want_symtab = 1;
+	want_symtab = 1;
+restart:
+	for (self->symtab_type = SYMTAB__BUILD_ID_CACHE;
 	     self->symtab_type != SYMTAB__NOT_FOUND;
 	     self->symtab_type++) {
 		switch (self->symtab_type) {
@@ -1536,17 +1538,7 @@
 			snprintf(name, size, "%s%s", symbol_conf.symfs,
 				 self->long_name);
 			break;
-
-		default:
-			/*
-			 * If we wanted a full symtab but no image had one,
-			 * relax our requirements and repeat the search.
-			 */
-			if (want_symtab) {
-				want_symtab = 0;
-				self->symtab_type = SYMTAB__BUILD_ID_CACHE;
-			} else
-				continue;
+		default:;
 		}
 
 		/* Name is now the name of the next image to try */
@@ -1573,6 +1565,15 @@
 		}
 	}
 
+	/*
+	 * If we wanted a full symtab but no image had one,
+	 * relax our requirements and repeat the search.
+	 */
+	if (ret <= 0 && want_symtab) {
+		want_symtab = 0;
+		goto restart;
+	}
+
 	free(name);
 	if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
 		return 0;

diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 66f4b78..c9dcbec 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c

@@ -38,6 +38,7 @@
 
 static void process_event_unsupported(union perf_event *event __unused,
 				      struct perf_sample *sample __unused,
+				      struct perf_evsel *evsel __unused,
 				      struct perf_session *session __unused,
 				      struct thread *thread __unused)
 {

diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index b04da57..f674dda 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h

@@ -280,6 +280,7 @@
 	int (*stop_script) (void);
 	void (*process_event) (union perf_event *event,
 			       struct perf_sample *sample,
+			       struct perf_evsel *evsel,
 			       struct perf_session *session,
 			       struct thread *thread);
 	int (*generate_script) (const char *outfile);

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7bee6dc..556e3ef 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c

@@ -30,7 +30,7 @@
 #include <linux/debugfs.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
 #include <linux/cpumask.h>
@@ -2446,33 +2446,26 @@
 	debugfs_remove(kvm_debugfs_dir);
 }
 
-static int kvm_suspend(struct sys_device *dev, pm_message_t state)
+static int kvm_suspend(void)
 {
 	if (kvm_usage_count)
 		hardware_disable_nolock(NULL);
 	return 0;
 }
 
-static int kvm_resume(struct sys_device *dev)
+static void kvm_resume(void)
 {
 	if (kvm_usage_count) {
 		WARN_ON(raw_spin_is_locked(&kvm_lock));
 		hardware_enable_nolock(NULL);
 	}
-	return 0;
 }
 
-static struct sysdev_class kvm_sysdev_class = {
-	.name = "kvm",
+static struct syscore_ops kvm_syscore_ops = {
 	.suspend = kvm_suspend,
 	.resume = kvm_resume,
 };
 
-static struct sys_device kvm_sysdev = {
-	.id = 0,
-	.cls = &kvm_sysdev_class,
-};
-
 struct page *bad_page;
 pfn_t bad_pfn;
 
@@ -2556,14 +2549,6 @@
 		goto out_free_2;
 	register_reboot_notifier(&kvm_reboot_notifier);
 
-	r = sysdev_class_register(&kvm_sysdev_class);
-	if (r)
-		goto out_free_3;
-
-	r = sysdev_register(&kvm_sysdev);
-	if (r)
-		goto out_free_4;
-
 	/* A kmem cache lets us meet the alignment requirements of fx_save. */
 	if (!vcpu_align)
 		vcpu_align = __alignof__(struct kvm_vcpu);
@@ -2571,7 +2556,7 @@
 					   0, NULL);
 	if (!kvm_vcpu_cache) {
 		r = -ENOMEM;
-		goto out_free_5;
+		goto out_free_3;
 	}
 
 	r = kvm_async_pf_init();
@@ -2588,6 +2573,8 @@
 		goto out_unreg;
 	}
 
+	register_syscore_ops(&kvm_syscore_ops);
+
 	kvm_preempt_ops.sched_in = kvm_sched_in;
 	kvm_preempt_ops.sched_out = kvm_sched_out;
 
@@ -2599,10 +2586,6 @@
 	kvm_async_pf_deinit();
 out_free:
 	kmem_cache_destroy(kvm_vcpu_cache);
-out_free_5:
-	sysdev_unregister(&kvm_sysdev);
-out_free_4:
-	sysdev_class_unregister(&kvm_sysdev_class);
 out_free_3:
 	unregister_reboot_notifier(&kvm_reboot_notifier);
 	unregister_cpu_notifier(&kvm_cpu_notifier);
@@ -2630,8 +2613,7 @@
 	misc_deregister(&kvm_dev);
 	kmem_cache_destroy(kvm_vcpu_cache);
 	kvm_async_pf_deinit();
-	sysdev_unregister(&kvm_sysdev);
-	sysdev_class_unregister(&kvm_sysdev_class);
+	unregister_syscore_ops(&kvm_syscore_ops);
 	unregister_reboot_notifier(&kvm_reboot_notifier);
 	unregister_cpu_notifier(&kvm_cpu_notifier);
 	on_each_cpu(hardware_disable_nolock, NULL, 1);
commit	6988f20fe04e9ef3aea488cb8ab57fbeb78e12f0	[log] [tgz]
author	Tejun Heo <tj@kernel.org>	Tue May 24 09:59:36 2011 +0200
committer	Tejun Heo <tj@kernel.org>	Tue May 24 09:59:36 2011 +0200
tree	c9d7fc50a2e2147a5ca07e3096e7eeb916ad2da9
parent	0415b00d175e0d8945e6785aad21b5f157976ce0 [diff]
parent	6ea0c34dac89611126455537552cffe6c7e832ad [diff]