Merge branch 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6

* 'upstream-linus' of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6: (21 commits)
  Fix build break in tsi108.c
  qeth: remove header_ops bug
  ir-functions.c:(.text+0xbce18): undefined reference to `input_event'
  NAPI: kconfig prompt and deleted doc file
  phy/bitbang: missing MODULE_LICENSE
  DM9000 initialization fix
  [PATCH] rt2x00: Add new rt73usb USB ID
  [PATCH] rt2x00: Fix residual check in PLCP calculations.
  [PATCH] iwlwifi: Fix rate setting in probe request for HW sacn
  [PATCH] b43: Make b43_stop() static
  [PATCH] drivers/net/wireless/b43/main.c: fix an uninitialized variable
  [PATCH] iwlwifi: set correct base rate for A band in rs_dbgfs_set_mcs
  [PATCH] zd1211rw, fix oops when ejecting install media
  [PATCH] b43legacy: Fix potential return of uninitialized variable
  [PATCH] iwl4965-base.c: fix off-by-one errors
  [PATCH] p54: Make filter configuration atomic
  [PATCH] rtl8187: remove NICMAC setting in configure_filters callback
  [PATCH] janitorial: fix all double includes in drivers/net/wireless
  [PATCH] rtl8187: Fix more frag bit checking, rts duration calc
  [PATCH] ipw2100: send WEXT scan events
  ...
diff --git a/.gitignore b/.gitignore
index 22fb8fa..8d14531 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@
 *.s
 *.ko
 *.so
+*.so.dbg
 *.mod.c
 *.i
 *.lst
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index d3290c4..aa38cc5 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -46,7 +46,7 @@
 
      <sect1><title>Atomic and pointer manipulation</title>
 !Iinclude/asm-x86/atomic_32.h
-!Iinclude/asm-x86/unaligned_32.h
+!Iinclude/asm-x86/unaligned.h
      </sect1>
 
      <sect1><title>Delaying, scheduling, and timer routines</title>
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt
new file mode 100644
index 0000000..eda40fd
--- /dev/null
+++ b/Documentation/accounting/cgroupstats.txt
@@ -0,0 +1,27 @@
+Control Groupstats is inspired by the discussion at
+http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
+suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
+
+Per cgroup statistics infrastructure re-uses code from the taskstats
+interface. A new set of cgroup operations are registered with commands
+and attributes specific to cgroups. It should be very easy to
+extend per cgroup statistics, by adding members to the cgroupstats
+structure.
+
+The current model for cgroupstats is a pull, a push model (to post
+statistics on interesting events), should be very easy to add. Currently
+user space requests for statistics by passing the cgroup path.
+Statistics about the state of all the tasks in the cgroup is returned to
+user space.
+
+NOTE: We currently rely on delay accounting for extracting information
+about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
+information will not be available.
+
+To extract cgroup statistics a utility very similar to getdelays.c
+has been developed, the sample output of the utility is shown below
+
+~/balbir/cgroupstats # ./getdelays  -C "/cgroup/a"
+sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
+~/balbir/cgroupstats # ./getdelays  -C "/cgroup"
+sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 552caba..da42ab4 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -87,30 +87,7 @@
 
 	This is used primarily during fault processing.
 
-5) void flush_tlb_pgtables(struct mm_struct *mm,
-			   unsigned long start, unsigned long end)
-
-   The software page tables for address space 'mm' for virtual
-   addresses in the range 'start' to 'end-1' are being torn down.
-
-   Some platforms cache the lowest level of the software page tables
-   in a linear virtually mapped array, to make TLB miss processing
-   more efficient.  On such platforms, since the TLB is caching the
-   software page table structure, it needs to be flushed when parts
-   of the software page table tree are unlinked/freed.
-
-   Sparc64 is one example of a platform which does this.
-
-   Usually, when munmap()'ing an area of user virtual address
-   space, the kernel leaves the page table parts around and just
-   marks the individual pte's as invalid.  However, if very large
-   portions of the address space are unmapped, the kernel frees up
-   those portions of the software page tables to prevent potential
-   excessive kernel memory usage caused by erratic mmap/mmunmap
-   sequences.  It is at these times that flush_tlb_pgtables will
-   be invoked.
-
-6) void update_mmu_cache(struct vm_area_struct *vma,
+5) void update_mmu_cache(struct vm_area_struct *vma,
 			 unsigned long address, pte_t pte)
 
 	At the end of every page fault, this routine is invoked to
@@ -123,7 +100,7 @@
 	translations for software managed TLB configurations.
 	The sparc64 port currently does this.
 
-7) void tlb_migrate_finish(struct mm_struct *mm)
+6) void tlb_migrate_finish(struct mm_struct *mm)
 
 	This interface is called at the end of an explicit
 	process migration. This interface provides a hook
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt
new file mode 100644
index 0000000..98a26f8
--- /dev/null
+++ b/Documentation/cgroups.txt
@@ -0,0 +1,545 @@
+				CGROUPS
+				-------
+
+Written by Paul Menage <menage@google.com> based on Documentation/cpusets.txt
+
+Original copyright statements from cpusets.txt:
+Portions Copyright (C) 2004 BULL SA.
+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+Modified by Paul Jackson <pj@sgi.com>
+Modified by Christoph Lameter <clameter@sgi.com>
+
+CONTENTS:
+=========
+
+1. Control Groups
+  1.1 What are cgroups ?
+  1.2 Why are cgroups needed ?
+  1.3 How are cgroups implemented ?
+  1.4 What does notify_on_release do ?
+  1.5 How do I use cgroups ?
+2. Usage Examples and Syntax
+  2.1 Basic Usage
+  2.2 Attaching processes
+3. Kernel API
+  3.1 Overview
+  3.2 Synchronization
+  3.3 Subsystem API
+4. Questions
+
+1. Control Groups
+==========
+
+1.1 What are cgroups ?
+----------------------
+
+Control Groups provide a mechanism for aggregating/partitioning sets of
+tasks, and all their future children, into hierarchical groups with
+specialized behaviour.
+
+Definitions:
+
+A *cgroup* associates a set of tasks with a set of parameters for one
+or more subsystems.
+
+A *subsystem* is a module that makes use of the task grouping
+facilities provided by cgroups to treat groups of tasks in
+particular ways. A subsystem is typically a "resource controller" that
+schedules a resource or applies per-cgroup limits, but it may be
+anything that wants to act on a group of processes, e.g. a
+virtualization subsystem.
+
+A *hierarchy* is a set of cgroups arranged in a tree, such that
+every task in the system is in exactly one of the cgroups in the
+hierarchy, and a set of subsystems; each subsystem has system-specific
+state attached to each cgroup in the hierarchy.  Each hierarchy has
+an instance of the cgroup virtual filesystem associated with it.
+
+At any one time there may be multiple active hierachies of task
+cgroups. Each hierarchy is a partition of all tasks in the system.
+
+User level code may create and destroy cgroups by name in an
+instance of the cgroup virtual file system, specify and query to
+which cgroup a task is assigned, and list the task pids assigned to
+a cgroup. Those creations and assignments only affect the hierarchy
+associated with that instance of the cgroup file system.
+
+On their own, the only use for cgroups is for simple job
+tracking. The intention is that other subsystems hook into the generic
+cgroup support to provide new attributes for cgroups, such as
+accounting/limiting the resources which processes in a cgroup can
+access. For example, cpusets (see Documentation/cpusets.txt) allows
+you to associate a set of CPUs and a set of memory nodes with the
+tasks in each cgroup.
+
+1.2 Why are cgroups needed ?
+----------------------------
+
+There are multiple efforts to provide process aggregations in the
+Linux kernel, mainly for resource tracking purposes. Such efforts
+include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
+namespaces. These all require the basic notion of a
+grouping/partitioning of processes, with newly forked processes ending
+in the same group (cgroup) as their parent process.
+
+The kernel cgroup patch provides the minimum essential kernel
+mechanisms required to efficiently implement such groups. It has
+minimal impact on the system fast paths, and provides hooks for
+specific subsystems such as cpusets to provide additional behaviour as
+desired.
+
+Multiple hierarchy support is provided to allow for situations where
+the division of tasks into cgroups is distinctly different for
+different subsystems - having parallel hierarchies allows each
+hierarchy to be a natural division of tasks, without having to handle
+complex combinations of tasks that would be present if several
+unrelated subsystems needed to be forced into the same tree of
+cgroups.
+
+At one extreme, each resource controller or subsystem could be in a
+separate hierarchy; at the other extreme, all subsystems
+would be attached to the same hierarchy.
+
+As an example of a scenario (originally proposed by vatsa@in.ibm.com)
+that can benefit from multiple hierarchies, consider a large
+university server with various users - students, professors, system
+tasks etc. The resource planning for this server could be along the
+following lines:
+
+       CPU :           Top cpuset
+                       /       \
+               CPUSet1         CPUSet2
+                  |              |
+               (Profs)         (Students)
+
+               In addition (system tasks) are attached to topcpuset (so
+               that they can run anywhere) with a limit of 20%
+
+       Memory : Professors (50%), students (30%), system (20%)
+
+       Disk : Prof (50%), students (30%), system (20%)
+
+       Network : WWW browsing (20%), Network File System (60%), others (20%)
+                               / \
+                       Prof (15%) students (5%)
+
+Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go
+into NFS network class.
+
+At the same time firefox/lynx will share an appropriate CPU/Memory class
+depending on who launched it (prof/student).
+
+With the ability to classify tasks differently for different resources
+(by putting those resource subsystems in different hierarchies) then
+the admin can easily set up a script which receives exec notifications
+and depending on who is launching the browser he can
+
+       # echo browser_pid > /mnt/<restype>/<userclass>/tasks
+
+With only a single hierarchy, he now would potentially have to create
+a separate cgroup for every browser launched and associate it with
+approp network and other resource class.  This may lead to
+proliferation of such cgroups.
+
+Also lets say that the administrator would like to give enhanced network
+access temporarily to a student's browser (since it is night and the user
+wants to do online gaming :)  OR give one of the students simulation
+apps enhanced CPU power,
+
+With ability to write pids directly to resource classes, its just a
+matter of :
+
+       # echo pid > /mnt/network/<new_class>/tasks
+       (after some time)
+       # echo pid > /mnt/network/<orig_class>/tasks
+
+Without this ability, he would have to split the cgroup into
+multiple separate ones and then associate the new cgroups with the
+new resource classes.
+
+
+
+1.3 How are cgroups implemented ?
+---------------------------------
+
+Control Groups extends the kernel as follows:
+
+ - Each task in the system has a reference-counted pointer to a
+   css_set.
+
+ - A css_set contains a set of reference-counted pointers to
+   cgroup_subsys_state objects, one for each cgroup subsystem
+   registered in the system. There is no direct link from a task to
+   the cgroup of which it's a member in each hierarchy, but this
+   can be determined by following pointers through the
+   cgroup_subsys_state objects. This is because accessing the
+   subsystem state is something that's expected to happen frequently
+   and in performance-critical code, whereas operations that require a
+   task's actual cgroup assignments (in particular, moving between
+   cgroups) are less common. A linked list runs through the cg_list
+   field of each task_struct using the css_set, anchored at
+   css_set->tasks.
+
+ - A cgroup hierarchy filesystem can be mounted  for browsing and
+   manipulation from user space.
+
+ - You can list all the tasks (by pid) attached to any cgroup.
+
+The implementation of cgroups requires a few, simple hooks
+into the rest of the kernel, none in performance critical paths:
+
+ - in init/main.c, to initialize the root cgroups and initial
+   css_set at system boot.
+
+ - in fork and exit, to attach and detach a task from its css_set.
+
+In addition a new file system, of type "cgroup" may be mounted, to
+enable browsing and modifying the cgroups presently known to the
+kernel.  When mounting a cgroup hierarchy, you may specify a
+comma-separated list of subsystems to mount as the filesystem mount
+options.  By default, mounting the cgroup filesystem attempts to
+mount a hierarchy containing all registered subsystems.
+
+If an active hierarchy with exactly the same set of subsystems already
+exists, it will be reused for the new mount. If no existing hierarchy
+matches, and any of the requested subsystems are in use in an existing
+hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
+is activated, associated with the requested subsystems.
+
+It's not currently possible to bind a new subsystem to an active
+cgroup hierarchy, or to unbind a subsystem from an active cgroup
+hierarchy. This may be possible in future, but is fraught with nasty
+error-recovery issues.
+
+When a cgroup filesystem is unmounted, if there are any
+child cgroups created below the top-level cgroup, that hierarchy
+will remain active even though unmounted; if there are no
+child cgroups then the hierarchy will be deactivated.
+
+No new system calls are added for cgroups - all support for
+querying and modifying cgroups is via this cgroup file system.
+
+Each task under /proc has an added file named 'cgroup' displaying,
+for each active hierarchy, the subsystem names and the cgroup name
+as the path relative to the root of the cgroup file system.
+
+Each cgroup is represented by a directory in the cgroup file system
+containing the following files describing that cgroup:
+
+ - tasks: list of tasks (by pid) attached to that cgroup
+ - notify_on_release flag: run /sbin/cgroup_release_agent on exit?
+
+Other subsystems such as cpusets may add additional files in each
+cgroup dir
+
+New cgroups are created using the mkdir system call or shell
+command.  The properties of a cgroup, such as its flags, are
+modified by writing to the appropriate file in that cgroups
+directory, as listed above.
+
+The named hierarchical structure of nested cgroups allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cgroup allows organizing the work load
+on a system into related sets of tasks.  A task may be re-attached to
+any other cgroup, if allowed by the permissions on the necessary
+cgroup file system directories.
+
+When a task is moved from one cgroup to another, it gets a new
+css_set pointer - if there's an already existing css_set with the
+desired collection of cgroups then that group is reused, else a new
+css_set is allocated. Note that the current implementation uses a
+linear search to locate an appropriate existing css_set, so isn't
+very efficient. A future version will use a hash table for better
+performance.
+
+To allow access from a cgroup to the css_sets (and hence tasks)
+that comprise it, a set of cg_cgroup_link objects form a lattice;
+each cg_cgroup_link is linked into a list of cg_cgroup_links for
+a single cgroup on its cont_link_list field, and a list of
+cg_cgroup_links for a single css_set on its cg_link_list.
+
+Thus the set of tasks in a cgroup can be listed by iterating over
+each css_set that references the cgroup, and sub-iterating over
+each css_set's task set.
+
+The use of a Linux virtual file system (vfs) to represent the
+cgroup hierarchy provides for a familiar permission and name space
+for cgroups, with a minimum of additional kernel code.
+
+1.4 What does notify_on_release do ?
+------------------------------------
+
+*** notify_on_release is disabled in the current patch set. It will be
+*** reactivated in a future patch in a less-intrusive manner
+
+If the notify_on_release flag is enabled (1) in a cgroup, then
+whenever the last task in the cgroup leaves (exits or attaches to
+some other cgroup) and the last child cgroup of that cgroup
+is removed, then the kernel runs the command specified by the contents
+of the "release_agent" file in that hierarchy's root directory,
+supplying the pathname (relative to the mount point of the cgroup
+file system) of the abandoned cgroup.  This enables automatic
+removal of abandoned cgroups.  The default value of
+notify_on_release in the root cgroup at system boot is disabled
+(0).  The default value of other cgroups at creation is the current
+value of their parents notify_on_release setting. The default value of
+a cgroup hierarchy's release_agent path is empty.
+
+1.5 How do I use cgroups ?
+--------------------------
+
+To start a new job that is to be contained within a cgroup, using
+the "cpuset" cgroup subsystem, the steps are something like:
+
+ 1) mkdir /dev/cgroup
+ 2) mount -t cgroup -ocpuset cpuset /dev/cgroup
+ 3) Create the new cgroup by doing mkdir's and write's (or echo's) in
+    the /dev/cgroup virtual file system.
+ 4) Start a task that will be the "founding father" of the new job.
+ 5) Attach that task to the new cgroup by writing its pid to the
+    /dev/cgroup tasks file for that cgroup.
+ 6) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cgroup
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cgroup:
+
+  mount -t cgroup cpuset -ocpuset /dev/cgroup
+  cd /dev/cgroup
+  mkdir Charlie
+  cd Charlie
+  /bin/echo 2-3 > cpus
+  /bin/echo 1 > mems
+  /bin/echo $$ > tasks
+  sh
+  # The subshell 'sh' is now running in cgroup Charlie
+  # The next line should display '/Charlie'
+  cat /proc/self/cgroup
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using the cgroups can be done through the cgroup
+virtual filesystem.
+
+To mount a cgroup hierarchy will all available subsystems, type:
+# mount -t cgroup xxx /dev/cgroup
+
+The "xxx" is not interpreted by the cgroup code, but will appear in
+/proc/mounts so may be any useful identifying string that you like.
+
+To mount a cgroup hierarchy with just the cpuset and numtasks
+subsystems, type:
+# mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup
+
+To change the set of subsystems bound to a mounted hierarchy, just
+remount with different options:
+
+# mount -o remount,cpuset,ns  /dev/cgroup
+
+Note that changing the set of subsystems is currently only supported
+when the hierarchy consists of a single (root) cgroup. Supporting
+the ability to arbitrarily bind/unbind subsystems from an existing
+cgroup hierarchy is intended to be implemented in the future.
+
+Then under /dev/cgroup you can find a tree that corresponds to the
+tree of the cgroups in the system. For instance, /dev/cgroup
+is the cgroup that holds the whole system.
+
+If you want to create a new cgroup under /dev/cgroup:
+# cd /dev/cgroup
+# mkdir my_cgroup
+
+Now you want to do something with this cgroup.
+# cd my_cgroup
+
+In this directory you can find several files:
+# ls
+notify_on_release release_agent tasks
+(plus whatever files are added by the attached subsystems)
+
+Now attach your shell to this cgroup:
+# /bin/echo $$ > tasks
+
+You can also create cgroups inside your cgroup by using mkdir in this
+directory.
+# mkdir my_sub_cs
+
+To remove a cgroup, just use rmdir:
+# rmdir my_sub_cs
+
+This will fail if the cgroup is in use (has cgroups inside, or
+has processes attached, or is held alive by other subsystem-specific
+reference).
+
+2.2 Attaching processes
+-----------------------
+
+# /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another:
+
+# /bin/echo PID1 > tasks
+# /bin/echo PID2 > tasks
+	...
+# /bin/echo PIDn > tasks
+
+3. Kernel API
+=============
+
+3.1 Overview
+------------
+
+Each kernel subsystem that wants to hook into the generic cgroup
+system needs to create a cgroup_subsys object. This contains
+various methods, which are callbacks from the cgroup system, along
+with a subsystem id which will be assigned by the cgroup system.
+
+Other fields in the cgroup_subsys object include:
+
+- subsys_id: a unique array index for the subsystem, indicating which
+  entry in cgroup->subsys[] this subsystem should be
+  managing. Initialized by cgroup_register_subsys(); prior to this
+  it should be initialized to -1
+
+- hierarchy: an index indicating which hierarchy, if any, this
+  subsystem is currently attached to. If this is -1, then the
+  subsystem is not attached to any hierarchy, and all tasks should be
+  considered to be members of the subsystem's top_cgroup. It should
+  be initialized to -1.
+
+- name: should be initialized to a unique subsystem name prior to
+  calling cgroup_register_subsystem. Should be no longer than
+  MAX_CGROUP_TYPE_NAMELEN
+
+Each cgroup object created by the system has an array of pointers,
+indexed by subsystem id; this pointer is entirely managed by the
+subsystem; the generic cgroup code will never touch this pointer.
+
+3.2 Synchronization
+-------------------
+
+There is a global mutex, cgroup_mutex, used by the cgroup
+system. This should be taken by anything that wants to modify a
+cgroup. It may also be taken to prevent cgroups from being
+modified, but more specific locks may be more appropriate in that
+situation.
+
+See kernel/cgroup.c for more details.
+
+Subsystems can take/release the cgroup_mutex via the functions
+cgroup_lock()/cgroup_unlock(), and can
+take/release the callback_mutex via the functions
+cgroup_lock()/cgroup_unlock().
+
+Accessing a task's cgroup pointer may be done in the following ways:
+- while holding cgroup_mutex
+- while holding the task's alloc_lock (via task_lock())
+- inside an rcu_read_lock() section via rcu_dereference()
+
+3.3 Subsystem API
+--------------------------
+
+Each subsystem should:
+
+- add an entry in linux/cgroup_subsys.h
+- define a cgroup_subsys object called <name>_subsys
+
+Each subsystem may export the following methods. The only mandatory
+methods are create/destroy. Any others that are null are presumed to
+be successful no-ops.
+
+struct cgroup_subsys_state *create(struct cgroup *cont)
+LL=cgroup_mutex
+
+Called to create a subsystem state object for a cgroup. The
+subsystem should allocate its subsystem state object for the passed
+cgroup, returning a pointer to the new object on success or a
+negative error code. On success, the subsystem pointer should point to
+a structure of type cgroup_subsys_state (typically embedded in a
+larger subsystem-specific object), which will be initialized by the
+cgroup system. Note that this will be called at initialization to
+create the root subsystem state for this subsystem; this case can be
+identified by the passed cgroup object having a NULL parent (since
+it's the root of the hierarchy) and may be an appropriate place for
+initialization code.
+
+void destroy(struct cgroup *cont)
+LL=cgroup_mutex
+
+The cgroup system is about to destroy the passed cgroup; the
+subsystem should do any necessary cleanup
+
+int can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+	       struct task_struct *task)
+LL=cgroup_mutex
+
+Called prior to moving a task into a cgroup; if the subsystem
+returns an error, this will abort the attach operation.  If a NULL
+task is passed, then a successful result indicates that *any*
+unspecified task can be moved into the cgroup. Note that this isn't
+called on a fork. If this method returns 0 (success) then this should
+remain valid while the caller holds cgroup_mutex.
+
+void attach(struct cgroup_subsys *ss, struct cgroup *cont,
+	    struct cgroup *old_cont, struct task_struct *task)
+LL=cgroup_mutex
+
+
+Called after the task has been attached to the cgroup, to allow any
+post-attachment activity that requires memory allocations or blocking.
+
+void fork(struct cgroup_subsy *ss, struct task_struct *task)
+LL=callback_mutex, maybe read_lock(tasklist_lock)
+
+Called when a task is forked into a cgroup. Also called during
+registration for all existing tasks.
+
+void exit(struct cgroup_subsys *ss, struct task_struct *task)
+LL=callback_mutex
+
+Called during task exit
+
+int populate(struct cgroup_subsys *ss, struct cgroup *cont)
+LL=none
+
+Called after creation of a cgroup to allow a subsystem to populate
+the cgroup directory with file entries.  The subsystem should make
+calls to cgroup_add_file() with objects of type cftype (see
+include/linux/cgroup.h for details).  Note that although this
+method can return an error code, the error code is currently not
+always handled well.
+
+void post_clone(struct cgroup_subsys *ss, struct cgroup *cont)
+
+Called at the end of cgroup_clone() to do any paramater
+initialization which might be required before a task could attach.  For
+example in cpusets, no task may attach before 'cpus' and 'mems' are set
+up.
+
+void bind(struct cgroup_subsys *ss, struct cgroup *root)
+LL=callback_mutex
+
+Called when a cgroup subsystem is rebound to a different hierarchy
+and root cgroup. Currently this will only involve movement between
+the default hierarchy (which never has sub-cgroups) and a hierarchy
+that is being created/destroyed (and hence has no sub-cgroups).
+
+4. Questions
+============
+
+Q: what's up with this '/bin/echo' ?
+A: bash's builtin 'echo' command does not check calls to write() against
+   errors. If you use it in the cgroup file system, you won't be
+   able to tell whether a command succeeded or failed.
+
+Q: When I attach processes, only the first of the line gets really attached !
+A: We can only return one error code per call to write(). So you should also
+   put only ONE pid.
+
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index b6d24c2..a741f65 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -220,7 +220,9 @@
   CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
   CPU is being offlined while tasks are frozen due to a suspend operation in
   progress
-- All process is migrated away from this outgoing CPU to a new CPU
+- All processes are migrated away from this outgoing CPU to new CPUs.
+  The new CPU is chosen from each process' current cpuset, which may be
+  a subset of all online CPUs.
 - All interrupts targeted to this CPU is migrated to a new CPU
 - timers/bottom half/task lets are also migrated to a new CPU
 - Once all services are migrated, kernel calls an arch specific routine
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index ec9de69..141bef1 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -7,6 +7,7 @@
 Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
 Modified by Paul Jackson <pj@sgi.com>
 Modified by Christoph Lameter <clameter@sgi.com>
+Modified by Paul Menage <menage@google.com>
 
 CONTENTS:
 =========
@@ -16,9 +17,9 @@
   1.2 Why are cpusets needed ?
   1.3 How are cpusets implemented ?
   1.4 What are exclusive cpusets ?
-  1.5 What does notify_on_release do ?
-  1.6 What is memory_pressure ?
-  1.7 What is memory spread ?
+  1.5 What is memory_pressure ?
+  1.6 What is memory spread ?
+  1.7 What is sched_load_balance ?
   1.8 How do I use cpusets ?
 2. Usage Examples and Syntax
   2.1 Basic Usage
@@ -44,18 +45,19 @@
 hooks, beyond what is already present, required to manage dynamic
 job placement on large systems.
 
-Each task has a pointer to a cpuset.  Multiple tasks may reference
-the same cpuset.  Requests by a task, using the sched_setaffinity(2)
-system call to include CPUs in its CPU affinity mask, and using the
-mbind(2) and set_mempolicy(2) system calls to include Memory Nodes
-in its memory policy, are both filtered through that tasks cpuset,
-filtering out any CPUs or Memory Nodes not in that cpuset.  The
-scheduler will not schedule a task on a CPU that is not allowed in
-its cpus_allowed vector, and the kernel page allocator will not
-allocate a page on a node that is not allowed in the requesting tasks
-mems_allowed vector.
+Cpusets use the generic cgroup subsystem described in
+Documentation/cgroup.txt.
 
-User level code may create and destroy cpusets by name in the cpuset
+Requests by a task, using the sched_setaffinity(2) system call to
+include CPUs in its CPU affinity mask, and using the mbind(2) and
+set_mempolicy(2) system calls to include Memory Nodes in its memory
+policy, are both filtered through that tasks cpuset, filtering out any
+CPUs or Memory Nodes not in that cpuset.  The scheduler will not
+schedule a task on a CPU that is not allowed in its cpus_allowed
+vector, and the kernel page allocator will not allocate a page on a
+node that is not allowed in the requesting tasks mems_allowed vector.
+
+User level code may create and destroy cpusets by name in the cgroup
 virtual file system, manage the attributes and permissions of these
 cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
 specify and query to which cpuset a task is assigned, and list the
@@ -115,7 +117,7 @@
  - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
    kernel.
  - Each task in the system is attached to a cpuset, via a pointer
-   in the task structure to a reference counted cpuset structure.
+   in the task structure to a reference counted cgroup structure.
  - Calls to sched_setaffinity are filtered to just those CPUs
    allowed in that tasks cpuset.
  - Calls to mbind and set_mempolicy are filtered to just
@@ -145,15 +147,10 @@
  - in page_alloc.c, to restrict memory to allowed nodes.
  - in vmscan.c, to restrict page recovery to the current cpuset.
 
-In addition a new file system, of type "cpuset" may be mounted,
-typically at /dev/cpuset, to enable browsing and modifying the cpusets
-presently known to the kernel.  No new system calls are added for
-cpusets - all support for querying and modifying cpusets is via
-this cpuset file system.
-
-Each task under /proc has an added file named 'cpuset', displaying
-the cpuset name, as the path relative to the root of the cpuset file
-system.
+You should mount the "cgroup" filesystem type in order to enable
+browsing and modifying the cpusets presently known to the kernel.  No
+new system calls are added for cpusets - all support for querying and
+modifying cpusets is via this cpuset file system.
 
 The /proc/<pid>/status file for each task has two added lines,
 displaying the tasks cpus_allowed (on which CPUs it may be scheduled)
@@ -163,16 +160,15 @@
   Cpus_allowed:   ffffffff,ffffffff,ffffffff,ffffffff
   Mems_allowed:   ffffffff,ffffffff
 
-Each cpuset is represented by a directory in the cpuset file system
-containing the following files describing that cpuset:
+Each cpuset is represented by a directory in the cgroup file system
+containing (on top of the standard cgroup files) the following
+files describing that cpuset:
 
  - cpus: list of CPUs in that cpuset
  - mems: list of Memory Nodes in that cpuset
  - memory_migrate flag: if set, move pages to cpusets nodes
  - cpu_exclusive flag: is cpu placement exclusive?
  - mem_exclusive flag: is memory placement exclusive?
- - tasks: list of tasks (by pid) attached to that cpuset
- - notify_on_release flag: run /sbin/cpuset_release_agent on exit?
  - memory_pressure: measure of how much paging pressure in cpuset
 
 In addition, the root cpuset only has the following file:
@@ -237,21 +233,7 @@
 outside even a mem_exclusive cpuset.
 
 
-1.5 What does notify_on_release do ?
-------------------------------------
-
-If the notify_on_release flag is enabled (1) in a cpuset, then whenever
-the last task in the cpuset leaves (exits or attaches to some other
-cpuset) and the last child cpuset of that cpuset is removed, then
-the kernel runs the command /sbin/cpuset_release_agent, supplying the
-pathname (relative to the mount point of the cpuset file system) of the
-abandoned cpuset.  This enables automatic removal of abandoned cpusets.
-The default value of notify_on_release in the root cpuset at system
-boot is disabled (0).  The default value of other cpusets at creation
-is the current value of their parents notify_on_release setting.
-
-
-1.6 What is memory_pressure ?
+1.5 What is memory_pressure ?
 -----------------------------
 The memory_pressure of a cpuset provides a simple per-cpuset metric
 of the rate that the tasks in a cpuset are attempting to free up in
@@ -308,7 +290,7 @@
 times 1000.
 
 
-1.7 What is memory spread ?
+1.6 What is memory spread ?
 ---------------------------
 There are two boolean flag files per cpuset that control where the
 kernel allocates pages for the file system buffers and related in
@@ -378,6 +360,142 @@
 data set, the memory allocation across the nodes in the jobs cpuset
 can become very uneven.
 
+1.7 What is sched_load_balance ?
+--------------------------------
+
+The kernel scheduler (kernel/sched.c) automatically load balances
+tasks.  If one CPU is underutilized, kernel code running on that
+CPU will look for tasks on other more overloaded CPUs and move those
+tasks to itself, within the constraints of such placement mechanisms
+as cpusets and sched_setaffinity.
+
+The algorithmic cost of load balancing and its impact on key shared
+kernel data structures such as the task list increases more than
+linearly with the number of CPUs being balanced.  So the scheduler
+has support to  partition the systems CPUs into a number of sched
+domains such that it only load balances within each sched domain.
+Each sched domain covers some subset of the CPUs in the system;
+no two sched domains overlap; some CPUs might not be in any sched
+domain and hence won't be load balanced.
+
+Put simply, it costs less to balance between two smaller sched domains
+than one big one, but doing so means that overloads in one of the
+two domains won't be load balanced to the other one.
+
+By default, there is one sched domain covering all CPUs, except those
+marked isolated using the kernel boot time "isolcpus=" argument.
+
+This default load balancing across all CPUs is not well suited for
+the following two situations:
+ 1) On large systems, load balancing across many CPUs is expensive.
+    If the system is managed using cpusets to place independent jobs
+    on separate sets of CPUs, full load balancing is unnecessary.
+ 2) Systems supporting realtime on some CPUs need to minimize
+    system overhead on those CPUs, including avoiding task load
+    balancing if that is not needed.
+
+When the per-cpuset flag "sched_load_balance" is enabled (the default
+setting), it requests that all the CPUs in that cpusets allowed 'cpus'
+be contained in a single sched domain, ensuring that load balancing
+can move a task (not otherwised pinned, as by sched_setaffinity)
+from any CPU in that cpuset to any other.
+
+When the per-cpuset flag "sched_load_balance" is disabled, then the
+scheduler will avoid load balancing across the CPUs in that cpuset,
+--except-- in so far as is necessary because some overlapping cpuset
+has "sched_load_balance" enabled.
+
+So, for example, if the top cpuset has the flag "sched_load_balance"
+enabled, then the scheduler will have one sched domain covering all
+CPUs, and the setting of the "sched_load_balance" flag in any other
+cpusets won't matter, as we're already fully load balancing.
+
+Therefore in the above two situations, the top cpuset flag
+"sched_load_balance" should be disabled, and only some of the smaller,
+child cpusets have this flag enabled.
+
+When doing this, you don't usually want to leave any unpinned tasks in
+the top cpuset that might use non-trivial amounts of CPU, as such tasks
+may be artificially constrained to some subset of CPUs, depending on
+the particulars of this flag setting in descendent cpusets.  Even if
+such a task could use spare CPU cycles in some other CPUs, the kernel
+scheduler might not consider the possibility of load balancing that
+task to that underused CPU.
+
+Of course, tasks pinned to a particular CPU can be left in a cpuset
+that disables "sched_load_balance" as those tasks aren't going anywhere
+else anyway.
+
+There is an impedance mismatch here, between cpusets and sched domains.
+Cpusets are hierarchical and nest.  Sched domains are flat; they don't
+overlap and each CPU is in at most one sched domain.
+
+It is necessary for sched domains to be flat because load balancing
+across partially overlapping sets of CPUs would risk unstable dynamics
+that would be beyond our understanding.  So if each of two partially
+overlapping cpusets enables the flag 'sched_load_balance', then we
+form a single sched domain that is a superset of both.  We won't move
+a task to a CPU outside it cpuset, but the scheduler load balancing
+code might waste some compute cycles considering that possibility.
+
+This mismatch is why there is not a simple one-to-one relation
+between which cpusets have the flag "sched_load_balance" enabled,
+and the sched domain configuration.  If a cpuset enables the flag, it
+will get balancing across all its CPUs, but if it disables the flag,
+it will only be assured of no load balancing if no other overlapping
+cpuset enables the flag.
+
+If two cpusets have partially overlapping 'cpus' allowed, and only
+one of them has this flag enabled, then the other may find its
+tasks only partially load balanced, just on the overlapping CPUs.
+This is just the general case of the top_cpuset example given a few
+paragraphs above.  In the general case, as in the top cpuset case,
+don't leave tasks that might use non-trivial amounts of CPU in
+such partially load balanced cpusets, as they may be artificially
+constrained to some subset of the CPUs allowed to them, for lack of
+load balancing to the other CPUs.
+
+1.7.1 sched_load_balance implementation details.
+------------------------------------------------
+
+The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary
+to most cpuset flags.)  When enabled for a cpuset, the kernel will
+ensure that it can load balance across all the CPUs in that cpuset
+(makes sure that all the CPUs in the cpus_allowed of that cpuset are
+in the same sched domain.)
+
+If two overlapping cpusets both have 'sched_load_balance' enabled,
+then they will be (must be) both in the same sched domain.
+
+If, as is the default, the top cpuset has 'sched_load_balance' enabled,
+then by the above that means there is a single sched domain covering
+the whole system, regardless of any other cpuset settings.
+
+The kernel commits to user space that it will avoid load balancing
+where it can.  It will pick as fine a granularity partition of sched
+domains as it can while still providing load balancing for any set
+of CPUs allowed to a cpuset having 'sched_load_balance' enabled.
+
+The internal kernel cpuset to scheduler interface passes from the
+cpuset code to the scheduler code a partition of the load balanced
+CPUs in the system. This partition is a set of subsets (represented
+as an array of cpumask_t) of CPUs, pairwise disjoint, that cover all
+the CPUs that must be load balanced.
+
+Whenever the 'sched_load_balance' flag changes, or CPUs come or go
+from a cpuset with this flag enabled, or a cpuset with this flag
+enabled is removed, the cpuset code builds a new such partition and
+passes it to the scheduler sched domain setup code, to have the sched
+domains rebuilt as necessary.
+
+This partition exactly defines what sched domains the scheduler should
+setup - one sched domain for each element (cpumask_t) in the partition.
+
+The scheduler remembers the currently active sched domain partitions.
+When the scheduler routine partition_sched_domains() is invoked from
+the cpuset code to update these sched domains, it compares the new
+partition requested with the current, and updates its sched domains,
+removing the old and adding the new, for each change.
 
 1.8 How do I use cpusets ?
 --------------------------
@@ -469,7 +587,7 @@
 To start a new job that is to be contained within a cpuset, the steps are:
 
  1) mkdir /dev/cpuset
- 2) mount -t cpuset none /dev/cpuset
+ 2) mount -t cgroup -ocpuset cpuset /dev/cpuset
  3) Create the new cpuset by doing mkdir's and write's (or echo's) in
     the /dev/cpuset virtual file system.
  4) Start a task that will be the "founding father" of the new job.
@@ -481,7 +599,7 @@
 named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
 and then start a subshell 'sh' in that cpuset:
 
-  mount -t cpuset none /dev/cpuset
+  mount -t cgroup -ocpuset cpuset /dev/cpuset
   cd /dev/cpuset
   mkdir Charlie
   cd Charlie
@@ -513,7 +631,7 @@
 virtual filesystem.
 
 To mount it, type:
-# mount -t cpuset none /dev/cpuset
+# mount -t cgroup -o cpuset cpuset /dev/cpuset
 
 Then under /dev/cpuset you can find a tree that corresponds to the
 tree of the cpusets in the system. For instance, /dev/cpuset
@@ -556,6 +674,18 @@
 This will fail if the cpuset is in use (has cpusets inside, or has
 processes attached).
 
+Note that for legacy reasons, the "cpuset" filesystem exists as a
+wrapper around the cgroup filesystem.
+
+The command
+
+mount -t cpuset X /dev/cpuset
+
+is equivalent to
+
+mount -t cgroup -ocpuset X /dev/cpuset
+echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
+
 2.2 Adding/removing cpus
 ------------------------
 
diff --git a/Documentation/device-mapper/dm-uevent.txt b/Documentation/device-mapper/dm-uevent.txt
new file mode 100644
index 0000000..07edbd8
--- /dev/null
+++ b/Documentation/device-mapper/dm-uevent.txt
@@ -0,0 +1,97 @@
+The device-mapper uevent code adds the capability to device-mapper to create
+and send kobject uevents (uevents).  Previously device-mapper events were only
+available through the ioctl interface.  The advantage of the uevents interface
+is the event contains environment attributes providing increased context for
+the event avoiding the need to query the state of the device-mapper device after
+the event is received.
+
+There are two functions currently for device-mapper events.  The first function
+listed creates the event and the second function sends the event(s).
+
+void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+                    const char *path, unsigned nr_valid_paths)
+
+void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+
+
+The variables added to the uevent environment are:
+
+Variable Name: DM_TARGET
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description:
+Value: Name of device-mapper target that generated the event.
+
+Variable Name: DM_ACTION
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description:
+Value: Device-mapper specific action that caused the uevent action.
+	PATH_FAILED - A path has failed.
+	PATH_REINSTATED - A path has been reinstated.
+
+Variable Name: DM_SEQNUM
+Uevent Action(s): KOBJ_CHANGE
+Type: unsigned integer
+Description: A sequence number for this specific device-mapper device.
+Value: Valid unsigned integer range.
+
+Variable Name: DM_PATH
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: Major and minor number of the path device pertaining to this
+event.
+Value: Path name in the form of "Major:Minor"
+
+Variable Name: DM_NR_VALID_PATHS
+Uevent Action(s): KOBJ_CHANGE
+Type: unsigned integer
+Description:
+Value: Valid unsigned integer range.
+
+Variable Name: DM_NAME
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: Name of the device-mapper device.
+Value: Name
+
+Variable Name: DM_UUID
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: UUID of the device-mapper device.
+Value: UUID. (Empty string if there isn't one.)
+
+An example of the uevents generated as captured by udevmonitor is shown
+below.
+
+1.) Path failure.
+UEVENT[1192521009.711215] change@/block/dm-3
+ACTION=change
+DEVPATH=/block/dm-3
+SUBSYSTEM=block
+DM_TARGET=multipath
+DM_ACTION=PATH_FAILED
+DM_SEQNUM=1
+DM_PATH=8:32
+DM_NR_VALID_PATHS=0
+DM_NAME=mpath2
+DM_UUID=mpath-35333333000002328
+MINOR=3
+MAJOR=253
+SEQNUM=1130
+
+2.) Path reinstate.
+UEVENT[1192521132.989927] change@/block/dm-3
+ACTION=change
+DEVPATH=/block/dm-3
+SUBSYSTEM=block
+DM_TARGET=multipath
+DM_ACTION=PATH_REINSTATED
+DM_SEQNUM=2
+DM_PATH=8:32
+DM_NR_VALID_PATHS=1
+DM_NAME=mpath2
+DM_UUID=mpath-35333333000002328
+MINOR=3
+MAJOR=253
+SEQNUM=1131
diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt
index d9d5230..4d932dc 100644
--- a/Documentation/input/input-programming.txt
+++ b/Documentation/input/input-programming.txt
@@ -42,8 +42,8 @@
 		goto err_free_irq;
 	}
 
-	button_dev->evbit[0] = BIT(EV_KEY);
-	button_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+	button_dev->evbit[0] = BIT_MASK(EV_KEY);
+	button_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
 
 	error = input_register_device(button_dev);
 	if (error) {
@@ -217,14 +217,15 @@
 that the thing is precise and always returns to exactly the center position
 (if it has any).
 
-1.4 NBITS(), LONG(), BIT()
+1.4 BITS_TO_LONGS(), BIT_WORD(), BIT_MASK()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-These three macros from input.h help some bitfield computations:
+These three macros from bitops.h help some bitfield computations:
 
-	NBITS(x) - returns the length of a bitfield array in longs for x bits
-	LONG(x)  - returns the index in the array in longs for bit x
-	BIT(x)   - returns the index in a long for bit x
+	BITS_TO_LONGS(x) - returns the length of a bitfield array in longs for
+			   x bits
+	BIT_WORD(x)	 - returns the index in the array in longs for bit x
+	BIT_MASK(x)	 - returns the index in a long for bit x
 
 1.5 The id* and name fields
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index fe8b0c4..616043a 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -77,7 +77,12 @@
   Optionally, dependencies only for this default value can be added with
   "if".
 
-- dependencies: "depends on"/"requires" <expr>
+- type definition + default value:
+	"def_bool"/"def_tristate" <expr> ["if" <expr>]
+  This is a shorthand notation for a type definition plus a value.
+  Optionally dependencies for this default value can be added with "if".
+
+- dependencies: "depends on" <expr>
   This defines a dependency for this menu entry. If multiple
   dependencies are defined, they are connected with '&&'. Dependencies
   are applied to all other options within this menu entry (which also
@@ -289,3 +294,10 @@
 	"source" <prompt>
 
 This reads the specified configuration file. This file is always parsed.
+
+mainmenu:
+
+	"mainmenu" <prompt>
+
+This sets the config program's title bar if the config program chooses
+to use it.
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index f099b81..6166e2d 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -518,6 +518,28 @@
 	In this example for a specific GCC version the build will error out explaining
 	to the user why it stops.
 
+    cc-cross-prefix
+	cc-cross-prefix is used to check if there exist a $(CC) in path with
+	one of the listed prefixes. The first prefix where there exist a
+	prefix$(CC) in the PATH is returned - and if no prefix$(CC) is found
+	then nothing is returned.
+	Additional prefixes are separated by a single space in the
+	call of cc-cross-prefix.
+	This functionality is usefull for architecture Makefile that try
+	to set CROSS_COMPILE to well know values but may have several
+	values to select between.
+	It is recommended only to try to set CROSS_COMPILE is it is a cross
+	build (host arch is different from target arch). And is CROSS_COMPILE
+	is already set then leave it with the old value.
+
+	Example:
+		#arch/m68k/Makefile
+		ifneq ($(SUBARCH),$(ARCH))
+		        ifeq ($(CROSS_COMPILE),)
+		               CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu-)
+			endif
+		endif
+
 === 4 Host Program support
 
 Kbuild supports building executables on the host for use during the
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 1b37b28..d0ac72c 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -231,6 +231,32 @@
   any space below the alignment point will be wasted.
 
 
+Extended crashkernel syntax
+===========================
+
+While the "crashkernel=size[@offset]" syntax is sufficient for most
+configurations, sometimes it's handy to have the reserved memory dependent
+on the value of System RAM -- that's mostly for distributors that pre-setup
+the kernel command line to avoid a unbootable system after some memory has
+been removed from the machine.
+
+The syntax is:
+
+    crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+    range=start-[end]
+
+For example:
+
+    crashkernel=512M-2G:64M,2G-:128M
+
+This would mean:
+
+    1) if the RAM is smaller than 512M, then don't reserve anything
+       (this is the "rescue" case)
+    2) if the RAM size is between 512M and 2G, then reserve 64M
+    3) if the RAM size is larger than 2G, then reserve 128M
+
+
 Boot into System Kernel
 =======================
 
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 189df0b..7bf6bd2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -431,8 +431,10 @@
 			over the 8254 in addition to over the IO-APIC. The
 			kernel tries to set a sensible default.
 
-	hpet=		[X86-32,HPET] option to disable HPET and use PIT.
-			Format: disable
+	hpet=		[X86-32,HPET] option to control HPET usage
+			Format: { enable (default) | disable | force }
+			disable: disable HPET and use PIT instead
+			force: allow force enabled of undocumented chips (ICH4, VIA)
 
 	com20020=	[HW,NET] ARCnet - COM20020 chipset
 			Format:
@@ -497,6 +499,13 @@
 			[KNL] Reserve a chunk of physical memory to
 			hold a kernel to switch to with kexec on panic.
 
+	crashkernel=range1:size1[,range2:size2,...][@offset]
+			[KNL] Same as above, but depends on the memory
+			in the running system. The syntax of range is
+			start-[end] where start and end are both
+			a memory unit (amount[KMG]). See also
+			Documentation/kdump/kdump.txt for a example.
+
 	cs4232=		[HW,OSS]
 			Format: <io>,<irq>,<dma>,<dma2>,<mpuio>,<mpuirq>
 
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
new file mode 100644
index 0000000..295a71b
--- /dev/null
+++ b/Documentation/markers.txt
@@ -0,0 +1,81 @@
+ 	             Using the Linux Kernel Markers
+
+			    Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Markers and their use. It provides
+examples of how to insert markers in the kernel and connect probe functions to
+them and provides some examples of probe functions.
+
+
+* Purpose of markers
+
+A marker placed in code provides a hook to call a function (probe) that you can
+provide at runtime. A marker can be "on" (a probe is connected to it) or "off"
+(no probe is attached). When a marker is "off" it has no effect, except for
+adding a tiny time penalty (checking a condition for a branch) and space
+penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section).  When a
+marker is "on", the function you provide is called each time the marker is
+executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the marker site).
+
+You can put markers at important locations in the code. Markers are
+lightweight hooks that can pass an arbitrary number of parameters,
+described in a printk-like format string, to the attached probe function.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+In order to use the macro trace_mark, you should include linux/marker.h.
+
+#include <linux/marker.h>
+
+And,
+
+trace_mark(subsystem_event, "%d %s", someint, somestring);
+Where :
+- subsystem_event is an identifier unique to your event
+    - subsystem is the name of your subsystem.
+    - event is the name of the event to mark.
+- "%d %s" is the formatted string for the serializer.
+- someint is an integer.
+- somestring is a char pointer.
+
+Connecting a function (probe) to a marker is done by providing a probe (function
+to call) for the specific marker through marker_probe_register() and can be
+activated by calling marker_arm(). Marker deactivation can be done by calling
+marker_disarm() as many times as marker_arm() has been called. Removing a probe
+is done through marker_probe_unregister(); it will disarm the probe and make
+sure there is no caller left using the probe when it returns. Probe removal is
+preempt-safe because preemption is disabled around the probe call. See the
+"Probe example" section below for a sample probe module.
+
+The marker mechanism supports inserting multiple instances of the same marker.
+Markers can be put in inline functions, inlined static functions, and
+unrolled loops as well as regular functions.
+
+The naming scheme "subsystem_event" is suggested here as a convention intended
+to limit collisions. Marker names are global to the kernel: they are considered
+as being the same whether they are in the core kernel image or in modules.
+Conflicting format strings for markers with the same name will cause the markers
+to be detected to have a different format string not to be armed and will output
+a printk warning which identifies the inconsistency:
+
+"Format mismatch for probe probe_name (format), marker (format)"
+
+
+* Probe / marker example
+
+See the example provided in samples/markers/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe marker-example (insmod order is not important)
+modprobe probe-example
+cat /proc/marker-example (returns an expected error)
+rmmod marker-example probe-example
+dmesg
diff --git a/Documentation/mips/00-INDEX b/Documentation/mips/00-INDEX
index 9df8a2e..3f13bf8 100644
--- a/Documentation/mips/00-INDEX
+++ b/Documentation/mips/00-INDEX
@@ -4,5 +4,3 @@
 	- README for MIPS AU1XXX IDE driver.
 GT64120.README
 	- README for dir with info on MIPS boards using GT-64120 or GT-64120A.
-time.README
-	- README for MIPS time services.
diff --git a/Documentation/mips/time.README b/Documentation/mips/time.README
deleted file mode 100644
index a4ce603..0000000
--- a/Documentation/mips/time.README
+++ /dev/null
@@ -1,173 +0,0 @@
-README for MIPS time services
-
-Jun Sun
-jsun@mvista.com or jsun@junsun.net
-
-
-ABOUT
------
-This file describes the new arch/mips/kernel/time.c, related files and the 
-services they provide. 
-
-If you are short in patience and just want to know how to use time.c for a 
-new board or convert an existing board, go to the last section.
-
-
-FILES, COMPATABILITY AND CONFIGS
----------------------------------
-
-The old arch/mips/kernel/time.c is renamed to old-time.c.
-
-A new time.c is put there, together with include/asm-mips/time.h.
-
-Two configs variables are introduced, CONFIG_OLD_TIME_C and CONFIG_NEW_TIME_C.
-So we allow boards using 
-
-	1) old time.c (CONFIG_OLD_TIME_C)
-	2) new time.c (CONFIG_NEW_TIME_C)
-	3) neither (their own private time.c)
-
-However, it is expected every board will move to the new time.c in the near
-future.
-
-
-WHAT THE NEW CODE PROVIDES?
---------------------------- 
-
-The new time code provide the following services:
-
-  a) Implements functions required by Linux common code:
-	time_init
-
-  b) provides an abstraction of RTC and null RTC implementation as default.
-	extern unsigned long (*rtc_get_time)(void);
-	extern int (*rtc_set_time)(unsigned long);
-
-  c) high-level and low-level timer interrupt routines where the timer
-     interrupt source  may or may not be the CPU timer.  The high-level
-     routine is dispatched through do_IRQ() while the low-level is
-     dispatched in assemably code (usually int-handler.S)
-
-
-WHAT THE NEW CODE REQUIRES?
----------------------------
-
-For the new code to work properly, each board implementation needs to supply
-the following functions or values:
-
-  a) board_time_init - a function pointer.  Invoked at the beginnig of
-     time_init().  It is optional.
-	1. (optional) set up RTC routines
-	2. (optional) calibrate and set the mips_hpt_frequency
-
-  b) plat_timer_setup - a function pointer.  Invoked at the end of time_init()
-	1. (optional) over-ride any decisions made in time_init()
-	2. set up the irqaction for timer interrupt.
-	3. enable the timer interrupt
-
-  c) (optional) board-specific RTC routines.
-
-  d) (optional) mips_hpt_frequency - It must be definied if the board
-     is using CPU counter for timer interrupt.
-
-
-PORTING GUIDE
--------------
-
-Step 1: decide how you like to implement the time services.
-
-  a) does this board have a RTC?  If yes, implement the two RTC funcs.
-
-  b) does the CPU have counter/compare registers? 
-
-     If the answer is no, you need a timer to provide the timer interrupt
-     at 100 HZ speed.
-
-  c) The following sub steps assume your CPU has counter register.
-     Do you plan to use the CPU counter register as the timer interrupt
-     or use an exnternal timer?
-
-     In order to use CPU counter register as the timer interrupt source, you
-     must know the counter speed (mips_hpt_frequency).  It is usually the
-     same as the CPU speed or an integral divisor of it.
-
-  d) decide on whether you want to use high-level or low-level timer
-     interrupt routines.  The low-level one is presumably faster, but should
-     not make too mcuh difference.
-
-
-Step 2:  the machine setup() function
-
-  If you supply board_time_init(), set the function poointer.
-
-
-Step 3: implement rtc routines, board_time_init() and plat_timer_setup()
-  if needed.
-
-  board_time_init() -
-  	a) (optional) set up RTC routines,
-        b) (optional) calibrate and set the mips_hpt_frequency
- 	    (only needed if you intended to use cpu counter as timer interrupt
- 	     source)
-
-  plat_timer_setup() -
- 	a) (optional) over-write any choices made above by time_init().
- 	b) machine specific code should setup the timer irqaction.
- 	c) enable the timer interrupt
-
-
-  If the RTC chip is a common chip, I suggest the routines are put under
-  arch/mips/libs.  For example, for DS1386 chip, one would create
-  rtc-ds1386.c under arch/mips/lib directory.  Add the following line to
-  the arch/mips/lib/Makefile:
-
-	obj-$(CONFIG_DDB5476) += rtc-ds1386.o
-
-Step 4: if you are using low-level timer interrupt, change your interrupt
-  dispathcing code to check for timer interrupt and jump to 
-  ll_timer_interrupt() directly  if one is detected.
-
-Step 5: Modify arch/mips/config.in and add CONFIG_NEW_TIME_C to your machine.
-  Modify the appropriate defconfig if applicable.
-
-Final notes: 
-
-For some tricky cases, you may need to add your own wrapper functions 
-for some of the functions in time.c.  
-
-For example, you may define your own timer interrupt routine, which does
-some of its own processing and then calls timer_interrupt().
-
-You can also over-ride any of the built-in functions (RTC routines
-and/or timer interrupt routine).
-
-
-PORTING NOTES FOR SMP
-----------------------
-
-If you have a SMP box, things are slightly more complicated.
-
-The time service running every jiffy is logically divided into two parts:
-
-  1) the one for the whole system  (defined in timer_interrupt())
-  2) the one that should run for each CPU (defined in local_timer_interrupt())
-
-You need to decide on your timer interrupt sources.
-
-  case 1) - whole system has only one timer interrupt delivered to one CPU
-
-	In this case, you set up timer interrupt as in UP systems.  In addtion,
-	you need to set emulate_local_timer_interrupt to 1 so that other
-	CPUs get to call local_timer_interrupt().
-
-	THIS IS CURRENTLY NOT IMPLEMNETED.  However, it is rather easy to write
-	one should such a need arise.  You simply make a IPI call.
-
-  case 2) - each CPU has a separate timer interrupt
-
-	In this case, you need to set up IRQ such that each of them will
-	call local_timer_interrupt().  In addition, you need to arrange
-	one and only one of them to call timer_interrupt().
-
-	You can also do the low-level version of those interrupt routines,
-	following similar dispatching routes described above.
diff --git a/Documentation/thinkpad-acpi.txt b/Documentation/thinkpad-acpi.txt
index 60953d6..3b95bba 100644
--- a/Documentation/thinkpad-acpi.txt
+++ b/Documentation/thinkpad-acpi.txt
@@ -105,10 +105,15 @@
 as a driver attribute (see below).
 
 Sysfs driver attributes are on the driver's sysfs attribute space,
-for 2.6.20 this is /sys/bus/platform/drivers/thinkpad_acpi/.
+for 2.6.23 this is /sys/bus/platform/drivers/thinkpad_acpi/ and
+/sys/bus/platform/drivers/thinkpad_hwmon/
 
-Sysfs device attributes are on the driver's sysfs attribute space,
-for 2.6.20 this is /sys/devices/platform/thinkpad_acpi/.
+Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
+space, for 2.6.23 this is /sys/devices/platform/thinkpad_acpi/.
+
+Sysfs device attributes for the sensors and fan are on the
+thinkpad_hwmon device's sysfs attribute space, but you should locate it
+looking for a hwmon device with the name attribute of "thinkpad".
 
 Driver version
 --------------
@@ -766,7 +771,7 @@
 -------------------
 
 procfs: /proc/acpi/ibm/thermal
-sysfs device attributes: (hwmon) temp*_input
+sysfs device attributes: (hwmon "thinkpad") temp*_input
 
 Most ThinkPads include six or more separate temperature sensors but only
 expose the CPU temperature through the standard ACPI methods.  This
@@ -989,7 +994,9 @@
 ---------------------------------------------------------
 
 procfs: /proc/acpi/ibm/fan
-sysfs device attributes: (hwmon) fan_input, pwm1, pwm1_enable
+sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1,
+			  pwm1_enable
+sysfs hwmon driver attributes: fan_watchdog
 
 NOTE NOTE NOTE: fan control operations are disabled by default for
 safety reasons.  To enable them, the module parameter "fan_control=1"
@@ -1131,7 +1138,7 @@
 	which can take up to two minutes.  May return rubbish on older
 	ThinkPads.
 
-driver attribute fan_watchdog:
+hwmon driver attribute fan_watchdog:
 	Fan safety watchdog timer interval, in seconds.  Minimum is
 	1 second, maximum is 120 seconds.  0 disables the watchdog.
 
@@ -1233,3 +1240,9 @@
 		layer, the radio switch generates input event EV_RADIO,
 		and the driver enables hot key handling by default in
 		the firmware.
+
+0x020000:	ABI fix: added a separate hwmon platform device and
+		driver, which must be located by name (thinkpad)
+		and the hwmon class for libsensors4 (lm-sensors 3)
+		compatibility.  Moved all hwmon attributes to this
+		new platform device.
diff --git a/MAINTAINERS b/MAINTAINERS
index 2534dc4..4ed4139 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2178,7 +2178,7 @@
 KCONFIG
 P:	Roman Zippel
 M:	zippel@linux-m68k.org
-L:	kbuild-devel@lists.sourceforge.net
+L:	linux-kbuild@vger.kernel.org
 S:	Maintained
 
 KDUMP
@@ -2207,6 +2207,7 @@
 P:	Sam Ravnborg
 M:	sam@ravnborg.org
 T:	git kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
+L:	linux-kbuild@vger.kernel.org
 S:	Maintained
 
 KERNEL JANITORS
diff --git a/Makefile b/Makefile
index 529b904..f9c264e 100644
--- a/Makefile
+++ b/Makefile
@@ -774,6 +774,9 @@
 ifdef CONFIG_HEADERS_CHECK
 	$(Q)$(MAKE) -f $(srctree)/Makefile headers_check
 endif
+ifdef CONFIG_SAMPLES
+	$(Q)$(MAKE) $(build)=samples
+endif
 	$(call vmlinux-modpost)
 	$(call if_changed_rule,vmlinux__)
 	$(Q)rm -f .old_version
@@ -884,10 +887,7 @@
 
 prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \
                    include/asm include/config/auto.conf
-ifneq ($(KBUILD_MODULES),)
-	$(Q)mkdir -p $(MODVERDIR)
-	$(Q)rm -f $(MODVERDIR)/*
-endif
+	$(cmd_crmodverdir)
 
 archprepare: prepare1 scripts_basic
 
@@ -903,14 +903,24 @@
 
 export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
 
-# FIXME: The asm symlink changes when $(ARCH) changes. That's
-# hard to detect, but I suppose "make mrproper" is a good idea
-# before switching between archs anyway.
+# The asm symlink changes when $(ARCH) changes.
+# Detect this and ask user to run make mrproper
 
-include/asm:
-	@echo '  SYMLINK $@ -> include/asm-$(SRCARCH)'
-	$(Q)if [ ! -d include ]; then mkdir -p include; fi;
-	@ln -fsn asm-$(SRCARCH) $@
+include/asm: FORCE
+	$(Q)set -e; asmlink=`readlink include/asm | cut -d '-' -f 2`;   \
+	if [ -L include/asm ]; then                                     \
+		if [ "$$asmlink" != "$(SRCARCH)" ]; then                \
+			echo "ERROR: the symlink $@ points to asm-$$asmlink but asm-$(SRCARCH) was expected"; \
+			echo "       set ARCH or save .config and run 'make mrproper' to fix it";             \
+			exit 1;                                         \
+		fi;                                                     \
+	else                                                            \
+		echo '  SYMLINK $@ -> include/asm-$(SRCARCH)';          \
+		if [ ! -d include ]; then                               \
+			mkdir -p include;                               \
+		fi;                                                     \
+		ln -fsn asm-$(SRCARCH) $@;                              \
+	fi
 
 # Generate some files
 # ---------------------------------------------------------------------------
@@ -1020,19 +1030,12 @@
 	fi
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
 
-# If System.map exists, run depmod.  This deliberately does not have a
-# dependency on System.map since that would run the dependency tree on
-# vmlinux.  This depmod is only for convenience to give the initial
+# This depmod is only for convenience to give the initial
 # boot a modules.dep even before / is mounted read-write.  However the
 # boot script depmod is the master version.
-ifeq "$(strip $(INSTALL_MOD_PATH))" ""
-depmod_opts	:=
-else
-depmod_opts	:= -b $(INSTALL_MOD_PATH) -r
-endif
 PHONY += _modinst_post
 _modinst_post: _modinst_
-	if [ -r System.map -a -x $(DEPMOD) ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi
+	$(call cmd,depmod)
 
 else # CONFIG_MODULES
 
@@ -1220,8 +1223,7 @@
 KBUILD_MODULES := 1
 PHONY += crmodverdir
 crmodverdir:
-	$(Q)mkdir -p $(MODVERDIR)
-	$(Q)rm -f $(MODVERDIR)/*
+	$(cmd_crmodverdir)
 
 PHONY += $(objtree)/Module.symvers
 $(objtree)/Module.symvers:
@@ -1249,15 +1251,6 @@
 	$(Q)mkdir -p $(MODLIB)/$(install-dir)
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
 
-# Run depmod only is we have System.map and depmod is executable
-quiet_cmd_depmod = DEPMOD  $(KERNELRELEASE)
-      cmd_depmod = if [ -r System.map -a -x $(DEPMOD) ]; then \
-                      $(DEPMOD) -ae -F System.map             \
-                      $(if $(strip $(INSTALL_MOD_PATH)),      \
-		      -b $(INSTALL_MOD_PATH) -r)              \
-		      $(KERNELRELEASE);                       \
-                   fi
-
 PHONY += _emodinst_post
 _emodinst_post: _emodinst_
 	$(call cmd,depmod)
@@ -1341,7 +1334,7 @@
 	  find $(__srctree)include/asm-generic $(RCS_FIND_IGNORE) \
 	       -name $1 -print; \
 	  find $(__srctree) $(RCS_FIND_IGNORE) \
-	       \( -name include -o -name arch \) -prune -o \
+	       \( -name include -o -name arch -o -name '.tmp_*' \) -prune -o \
 	       -name $1 -print; \
 	  )
 endef
@@ -1490,9 +1483,11 @@
 
 # Modules
 / %/: prepare scripts FORCE
+	$(cmd_crmodverdir)
 	$(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
 	$(build)=$(build-dir)
 %.ko: prepare scripts FORCE
+	$(cmd_crmodverdir)
 	$(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1)   \
 	$(build)=$(build-dir) $(@:.ko=.o)
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
@@ -1506,6 +1501,19 @@
 quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN   $(wildcard $(rm-files)))
       cmd_rmfiles = rm -f $(rm-files)
 
+# Run depmod only is we have System.map and depmod is executable
+# and we build for the host arch
+quiet_cmd_depmod = DEPMOD  $(KERNELRELEASE)
+      cmd_depmod = \
+	if [ -r System.map -a -x $(DEPMOD) -a "$(SUBARCH)" == "$(ARCH)" ]; then \
+		$(DEPMOD) -ae -F System.map                                     \
+		$(if $(strip $(INSTALL_MOD_PATH)), -b $(INSTALL_MOD_PATH) -r)   \
+		$(KERNELRELEASE);                                               \
+	fi
+
+# Create temporary dir for module support files
+cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR); rm -f $(MODVERDIR)/*
+
 
 a_flags = -Wp,-MD,$(depfile) $(KBUILD_AFLAGS) $(AFLAGS_KERNEL) \
 	  $(NOSTDINC_FLAGS) $(KBUILD_CPPFLAGS) \
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 2a85dc3..4c002ba 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -654,7 +654,7 @@
 
 source "fs/Kconfig"
 
-source "arch/alpha/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/alpha/Kconfig.debug"
 
diff --git a/arch/alpha/kernel/semaphore.c b/arch/alpha/kernel/semaphore.c
index 8c8aaa2..8d2982a 100644
--- a/arch/alpha/kernel/semaphore.c
+++ b/arch/alpha/kernel/semaphore.c
@@ -69,7 +69,7 @@
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, tsk->pid, sem);
+	       tsk->comm, task_pid_nr(tsk), sem);
 #endif
 
 	tsk->state = TASK_UNINTERRUPTIBLE;
@@ -98,7 +98,7 @@
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down acquired(%p)\n",
-	       tsk->comm, tsk->pid, sem);
+	       tsk->comm, task_pid_nr(tsk), sem);
 #endif
 }
 
@@ -111,7 +111,7 @@
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, tsk->pid, sem);
+	       tsk->comm, task_pid_nr(tsk), sem);
 #endif
 
 	tsk->state = TASK_INTERRUPTIBLE;
@@ -139,7 +139,7 @@
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down %s(%p)\n",
-	       current->comm, current->pid,
+	       current->comm, task_pid_nr(current),
 	       (ret < 0 ? "interrupted" : "acquired"), sem);
 #endif
 	return ret;
@@ -168,7 +168,7 @@
 #endif
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, current->pid, sem,
+	       current->comm, task_pid_nr(current), sem,
 	       atomic_read(&sem->count), __builtin_return_address(0));
 #endif
 	__down(sem);
@@ -182,7 +182,7 @@
 #endif
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, current->pid, sem,
+	       current->comm, task_pid_nr(current), sem,
 	       atomic_read(&sem->count), __builtin_return_address(0));
 #endif
 	return __down_interruptible(sem);
@@ -201,7 +201,7 @@
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down_trylock %s from %p\n",
-	       current->comm, current->pid,
+	       current->comm, task_pid_nr(current),
 	       ret ? "failed" : "acquired",
 	       __builtin_return_address(0));
 #endif
@@ -217,7 +217,7 @@
 #endif
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): up(%p) <count=%d> from %p\n",
-	       current->comm, current->pid, sem,
+	       current->comm, task_pid_nr(current), sem,
 	       atomic_read(&sem->count), __builtin_return_address(0));
 #endif
 	__up(sem);
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index ec0f05e..2dc7f9f 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -182,7 +182,7 @@
 #ifdef CONFIG_SMP
 	printk("CPU %d ", hard_smp_processor_id());
 #endif
-	printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err);
+	printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
 	dik_show_regs(regs, r9_15);
 	add_taint(TAINT_DIE);
 	dik_show_trace((unsigned long *)(regs+1));
@@ -646,7 +646,7 @@
 	lock_kernel();
 
 	printk("%s(%d): unhandled unaligned exception\n",
-	       current->comm, current->pid);
+	       current->comm, task_pid_nr(current));
 
 	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx\n",
 	       pc, una_reg(26), regs->ps);
@@ -786,7 +786,7 @@
 		}
 		if (++cnt < 5) {
 			printk("%s(%d): unaligned trap at %016lx: %p %lx %ld\n",
-			       current->comm, current->pid,
+			       current->comm, task_pid_nr(current),
 			       regs->pc - 4, va, opcode, reg);
 		}
 		last_time = jiffies;
diff --git a/arch/alpha/lib/fls.c b/arch/alpha/lib/fls.c
index 7ad84ea..32afaa3 100644
--- a/arch/alpha/lib/fls.c
+++ b/arch/alpha/lib/fls.c
@@ -3,7 +3,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 /* This is fls(x)-1, except zero is held to zero.  This allows most
    efficent input into extbl, plus it allows easy handling of fls(0)=0.  */
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 25154df..4829f96 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -188,13 +188,13 @@
 	/* We ran out of memory, or some other thing happened to us that
 	   made us unable to handle the page fault gracefully.  */
  out_of_memory:
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
 	printk(KERN_ALERT "VM: killing process %s(%d)\n",
-	       current->comm, current->pid);
+	       current->comm, task_pid_nr(current));
 	if (!user_mode(regs))
 		goto no_context;
 	do_group_exit(SIGKILL);
diff --git a/arch/alpha/oprofile/Kconfig b/arch/alpha/oprofile/Kconfig
deleted file mode 100644
index 5ade198..0000000
--- a/arch/alpha/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0a0c88d..4cee938 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1068,7 +1068,7 @@
 
 source "fs/Kconfig"
 
-source "arch/arm/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/arm/Kconfig.debug"
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 93b7f8e..4f1a031 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -265,7 +265,7 @@
 void show_regs(struct pt_regs * regs)
 {
 	printk("\n");
-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+	printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
 	__show_regs(regs);
 	__backtrace();
 }
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 5feee72..4b05dc5 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -382,16 +382,16 @@
 
 		if (ret != 2 || old_insn.thumb != BREAKINST_THUMB)
 			printk(KERN_ERR "%s:%d: corrupted Thumb breakpoint at "
-				"0x%08lx (0x%04x)\n", task->comm, task->pid,
-				addr, old_insn.thumb);
+				"0x%08lx (0x%04x)\n", task->comm,
+				task_pid_nr(task), addr, old_insn.thumb);
 	} else {
 		ret = swap_insn(task, addr & ~3, &old_insn.arm,
 				&bp->insn.arm, 4);
 
 		if (ret != 4 || old_insn.arm != BREAKINST_ARM)
 			printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at "
-				"0x%08lx (0x%08x)\n", task->comm, task->pid,
-				addr, old_insn.arm);
+				"0x%08lx (0x%08x)\n", task->comm,
+				task_pid_nr(task), addr, old_insn.arm);
 	}
 }
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 8ad4761..4764bd9 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -223,7 +223,7 @@
 	print_modules();
 	__show_regs(regs);
 	printk("Process %s (pid: %d, stack limit = 0x%p)\n",
-		tsk->comm, tsk->pid, thread + 1);
+		tsk->comm, task_pid_nr(tsk), thread + 1);
 
 	if (!user_mode(regs) || in_interrupt()) {
 		dump_mem("Stack: ", regs->ARM_sp,
@@ -337,7 +337,7 @@
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_UNDEFINED) {
 		printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
-			current->comm, current->pid, pc);
+			current->comm, task_pid_nr(current), pc);
 		dump_instr(regs);
 	}
 #endif
@@ -388,7 +388,7 @@
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_SYSCALL) {
 		printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n",
-			current->pid, current->comm, n);
+			task_pid_nr(current), current->comm, n);
 		dump_instr(regs);
 	}
 #endif
@@ -565,7 +565,7 @@
 	 */
 	if (user_debug & UDBG_SYSCALL) {
 		printk("[%d] %s: arm syscall %d\n",
-		       current->pid, current->comm, no);
+		       task_pid_nr(current), current->comm, no);
 		dump_instr(regs);
 		if (user_mode(regs)) {
 			__show_regs(regs);
@@ -642,7 +642,7 @@
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_BADABORT) {
 		printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n",
-			current->pid, current->comm, code, instr);
+			task_pid_nr(current), current->comm, code, instr);
 		dump_instr(regs);
 		show_pte(current->mm, addr);
 	}
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 074b7cb..e162cca 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -757,7 +757,7 @@
 	if (ai_usermode & 1)
 		printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx "
 		       "Address=0x%08lx FSR 0x%03x\n", current->comm,
-			current->pid, instrptr,
+			task_pid_nr(current), instrptr,
 		        thumb_mode(regs) ? 4 : 8,
 		        thumb_mode(regs) ? tinstr : instr,
 		        addr, fsr);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 59ed1d0..a8a7dab 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -197,7 +197,7 @@
 	return fault;
 
 out_of_memory:
-	if (!is_init(tsk))
+	if (!is_global_init(tsk))
 		goto out;
 
 	/*
diff --git a/arch/arm/oprofile/Kconfig b/arch/arm/oprofile/Kconfig
deleted file mode 100644
index afd93ad..0000000
--- a/arch/arm/oprofile/Kconfig
+++ /dev/null
@@ -1,42 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-if OPROFILE
-
-config OPROFILE_ARMV6
-	bool
-	depends on CPU_V6 && !SMP
-	default y
-	select OPROFILE_ARM11_CORE
-
-config OPROFILE_MPCORE
-	bool
-	depends on CPU_V6 && SMP
-	default y
-	select OPROFILE_ARM11_CORE
-
-config OPROFILE_ARM11_CORE
-	bool
-
-endif
-
-endmenu
-
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 9a73ce7..8a7caf8e 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -89,7 +89,7 @@
 	 * generate the same exception over and over again and we get
 	 * nowhere.  Better to kill it and let the kernel panic.
 	 */
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		__sighandler_t handler;
 
 		spin_lock_irq(&current->sighand->siglock);
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index 11472f8..6560cb1 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -160,7 +160,7 @@
 		if (exception_trace && printk_ratelimit())
 			printk("%s%s[%d]: segfault at %08lx pc %08lx "
 			       "sp %08lx ecr %lu\n",
-			       is_init(tsk) ? KERN_EMERG : KERN_INFO,
+			       is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
 			       tsk->comm, tsk->pid, address, regs->pc,
 			       regs->sp, ecr);
 		_exception(SIGSEGV, regs, code, address);
@@ -209,7 +209,7 @@
 	 */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
@@ -231,7 +231,7 @@
 	if (exception_trace)
 		printk("%s%s[%d]: bus error at %08lx pc %08lx "
 		       "sp %08lx ecr %lu\n",
-		       is_init(tsk) ? KERN_EMERG : KERN_INFO,
+		       is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
 		       tsk->comm, tsk->pid, address, regs->pc,
 		       regs->sp, ecr);
 
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index aa9db30..4c5ca9d 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -1012,7 +1012,7 @@
 
 source "fs/Kconfig"
 
-source "arch/blackfin/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 menu "Kernel hacking"
 
diff --git a/arch/blackfin/oprofile/Kconfig b/arch/blackfin/oprofile/Kconfig
deleted file mode 100644
index 0a2fd99..0000000
--- a/arch/blackfin/oprofile/Kconfig
+++ /dev/null
@@ -1,29 +0,0 @@
-menu "Profiling support"
-depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-config HARDWARE_PM
-	tristate "Hardware Performance Monitor Profiling"
-	depends on PROFILING
-	help
-	  take use of hardware performance monitor to profiling the kernel
-	  and application.
-
-	  If unsure, say N.
-
-endmenu
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 6b4d026..21900a9 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -196,6 +196,8 @@
 
 source "drivers/usb/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/cris/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 74eef71..43153e7 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -375,6 +375,8 @@
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/frv/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/frv/kernel/irq-mb93091.c b/arch/frv/kernel/irq-mb93091.c
index ad753c1..9e38f99 100644
--- a/arch/frv/kernel/irq-mb93091.c
+++ b/arch/frv/kernel/irq-mb93091.c
@@ -17,10 +17,10 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
diff --git a/arch/frv/kernel/irq-mb93093.c b/arch/frv/kernel/irq-mb93093.c
index e0983f6..3c2752c 100644
--- a/arch/frv/kernel/irq-mb93093.c
+++ b/arch/frv/kernel/irq-mb93093.c
@@ -17,10 +17,10 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
diff --git a/arch/frv/kernel/irq-mb93493.c b/arch/frv/kernel/irq-mb93493.c
index c157eef..7754c73 100644
--- a/arch/frv/kernel/irq-mb93493.c
+++ b/arch/frv/kernel/irq-mb93493.c
@@ -17,10 +17,10 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index c7e59dc..7ddb690 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -24,12 +24,12 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/module.h>
+#include <linux/bitops.h>
 
 #include <asm/atomic.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/delay.h>
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index e35f74e..e2e9f57 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -223,6 +223,8 @@
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/h8300/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index b84d505..d1bedbf 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1080,7 +1080,9 @@
 
 endif # APM
 
-source "arch/x86/kernel/cpu/cpufreq/Kconfig"
+source "arch/x86/kernel/cpu/cpufreq/Kconfig_32"
+
+source "drivers/cpuidle/Kconfig"
 
 endmenu
 
@@ -1256,31 +1258,6 @@
 
 source "fs/Kconfig"
 
-menuconfig INSTRUMENTATION
-	bool "Instrumentation Support"
-	default y
-	---help---
-	  Say Y here to get to see options related to performance measurement,
-	  debugging, and testing. This option alone does not add any kernel code.
-
-	  If you say N, all options in this submenu will be skipped and disabled.
-
-if INSTRUMENTATION
-
-source "arch/x86/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes"
-	depends on KALLSYMS && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-
-endif # INSTRUMENTATION
-
 source "arch/i386/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index f036d2d..b88e47c 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -102,7 +102,7 @@
 # default subarch .h files
 mflags-y += -Iinclude/asm-x86/mach-default
 
-head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task_32.o
+head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task.o
 
 libs-y 					+= arch/x86/lib/
 core-y					+= arch/x86/kernel/ \
@@ -131,9 +131,9 @@
 zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage
 
 zImage bzImage: vmlinux
-	$(Q)mkdir -p $(objtree)/arch/i386/boot
-	$(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage
 	$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
+	$(Q)mkdir -p $(objtree)/arch/i386/boot
+	$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage
 
 compressed: zImage
 
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index c60532d..c89108e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -592,20 +592,7 @@
 
 source "arch/ia64/hp/sim/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/ia64/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes"
-	depends on KALLSYMS && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/ia64/Kconfig.debug"
 
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
index 449d3e7..75fd90d 100644
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -26,6 +26,7 @@
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_LOG_BUF_SHIFT=20
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index a3405b3..d025a22 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -773,7 +773,7 @@
 			if (flags & MAP_SHARED)
 				printk(KERN_INFO
 				       "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
-				       current->comm, current->pid, start);
+				       current->comm, task_pid_nr(current), start);
 			ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
 					   off);
 			if (IS_ERR((void *) ret))
@@ -786,7 +786,7 @@
 			if (flags & MAP_SHARED)
 				printk(KERN_INFO
 				       "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
-				       current->comm, current->pid, end);
+				       current->comm, task_pid_nr(current), end);
 			ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
 					   (off + len) - offset_in_page(end));
 			if (IS_ERR((void *) ret))
@@ -816,7 +816,7 @@
 
 	if ((flags & MAP_SHARED) && !is_congruent)
 		printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
-		       "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
+		       "(addr=0x%lx,off=0x%llx)\n", current->comm, task_pid_nr(current), start, off);
 
 	DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
 	    is_congruent ? "congruent" : "not congruent", poff);
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 73ca86d..8e4894b 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -967,7 +967,7 @@
  * to use.  We can allocate partial granules only if the unavailable
  * parts exist, and are WB.
  */
-void
+unsigned long
 efi_memmap_init(unsigned long *s, unsigned long *e)
 {
 	struct kern_memdesc *k, *prev = NULL;
@@ -1084,6 +1084,8 @@
 	/* reserve the memory we are using for kern_memmap */
 	*s = (u64)kern_memmap;
 	*e = (u64)++k;
+
+	return total_mem;
 }
 
 void
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f55fa078..59169bf7 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -158,14 +158,14 @@
  */
 #define PROTECT_CTX(c, f) \
 	do {  \
-		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
+		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \
 		spin_lock_irqsave(&(c)->ctx_lock, f); \
-		DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->pid)); \
+		DPRINT(("spinlocked ctx %p  by [%d]\n", c, task_pid_nr(current))); \
 	} while(0)
 
 #define UNPROTECT_CTX(c, f) \
 	do { \
-		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
+		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \
 		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
 	} while(0)
 
@@ -227,12 +227,12 @@
 #ifdef PFM_DEBUGGING
 #define DPRINT(a) \
 	do { \
-		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
 	} while (0)
 
 #define DPRINT_ovfl(a) \
 	do { \
-		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
 	} while (0)
 #endif
 
@@ -913,7 +913,7 @@
 	unsigned long mask, val, ovfl_mask;
 	int i;
 
-	DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
+	DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task)));
 
 	ovfl_mask = pmu_conf->ovfl_val;
 	/*
@@ -992,12 +992,12 @@
 	ovfl_mask = pmu_conf->ovfl_val;
 
 	if (task != current) {
-		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
+		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current));
 		return;
 	}
 	if (ctx->ctx_state != PFM_CTX_MASKED) {
 		printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
-			task->pid, current->pid, ctx->ctx_state);
+			task_pid_nr(task), task_pid_nr(current), ctx->ctx_state);
 		return;
 	}
 	psr = pfm_get_psr();
@@ -1051,7 +1051,8 @@
 		if ((mask & 0x1) == 0UL) continue;
 		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
 		ia64_set_pmc(i, ctx->th_pmcs[i]);
-		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i]));
+		DPRINT(("[%d] pmc[%d]=0x%lx\n",
+					task_pid_nr(task), i, ctx->th_pmcs[i]));
 	}
 	ia64_srlz_d();
 
@@ -1370,7 +1371,7 @@
 
 error_conflict:
 	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
-  		pfm_sessions.pfs_sys_session[cpu]->pid,
+  		task_pid_nr(pfm_sessions.pfs_sys_session[cpu]),
 		cpu));
 abort:
 	UNLOCK_PFS(flags);
@@ -1442,7 +1443,7 @@
 
 	/* sanity checks */
 	if (task->mm == NULL || size == 0UL || vaddr == NULL) {
-		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
+		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm);
 		return -EINVAL;
 	}
 
@@ -1459,7 +1460,7 @@
 
 	up_write(&task->mm->mmap_sem);
 	if (r !=0) {
-		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
+		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size);
 	}
 
 	DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
@@ -1501,7 +1502,7 @@
 	return 0;
 
 invalid_free:
-	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
+	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current));
 	return -EINVAL;
 }
 #endif
@@ -1547,13 +1548,13 @@
 	unsigned long flags;
   	DECLARE_WAITQUEUE(wait, current);
 	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
 		return -EINVAL;
 	}
 
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current));
 		return -EINVAL;
 	}
 
@@ -1607,7 +1608,7 @@
 
 		PROTECT_CTX(ctx, flags);
 	}
-	DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
+	DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret));
   	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
 
@@ -1616,7 +1617,7 @@
 	ret = -EINVAL;
 	msg = pfm_get_next_msg(ctx);
 	if (msg == NULL) {
-		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
+		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current));
 		goto abort_locked;
 	}
 
@@ -1647,13 +1648,13 @@
 	unsigned int mask = 0;
 
 	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
 		return 0;
 	}
 
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current));
 		return 0;
 	}
 
@@ -1692,7 +1693,7 @@
 	ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
 
 	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
-		current->pid,
+		task_pid_nr(current),
 		fd,
 		on,
 		ctx->ctx_async_queue, ret));
@@ -1707,13 +1708,13 @@
 	int ret;
 
 	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 	/*
@@ -1759,7 +1760,7 @@
 	if (owner != ctx->ctx_task) {
 		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
 			smp_processor_id(),
-			owner->pid, ctx->ctx_task->pid);
+			task_pid_nr(owner), task_pid_nr(ctx->ctx_task));
 		return;
 	}
 	if (GET_PMU_CTX() != ctx) {
@@ -1769,7 +1770,7 @@
 		return;
 	}
 
-	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));	
+	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task)));
 	/*
 	 * the context is already protected in pfm_close(), we simply
 	 * need to mask interrupts to avoid a PMU interrupt race on
@@ -1821,7 +1822,7 @@
 
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 
@@ -1969,7 +1970,7 @@
 	
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 
@@ -2066,7 +2067,7 @@
 	 	 */
 		ctx->ctx_state = PFM_CTX_ZOMBIE;
 
-		DPRINT(("zombie ctx for [%d]\n", task->pid));
+		DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task)));
 		/*
 		 * cannot free the context on the spot. deferred until
 		 * the task notices the ZOMBIE state
@@ -2472,7 +2473,7 @@
 	/* invoke and lock buffer format, if found */
 	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
 	if (fmt == NULL) {
-		DPRINT(("[%d] cannot find buffer format\n", task->pid));
+		DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task)));
 		return -EINVAL;
 	}
 
@@ -2483,7 +2484,7 @@
 
 	ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
 
-	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
+	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret));
 
 	if (ret) goto error;
 
@@ -2605,23 +2606,23 @@
 	 * no kernel task or task not owner by caller
 	 */
 	if (task->mm == NULL) {
-		DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
+		DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task)));
 		return -EPERM;
 	}
 	if (pfm_bad_permissions(task)) {
-		DPRINT(("no permission to attach to  [%d]\n", task->pid));
+		DPRINT(("no permission to attach to  [%d]\n", task_pid_nr(task)));
 		return -EPERM;
 	}
 	/*
 	 * cannot block in self-monitoring mode
 	 */
 	if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
-		DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
+		DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task)));
 		return -EINVAL;
 	}
 
 	if (task->exit_state == EXIT_ZOMBIE) {
-		DPRINT(("cannot attach to  zombie task [%d]\n", task->pid));
+		DPRINT(("cannot attach to  zombie task [%d]\n", task_pid_nr(task)));
 		return -EBUSY;
 	}
 
@@ -2631,7 +2632,7 @@
 	if (task == current) return 0;
 
 	if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
+		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state));
 		return -EBUSY;
 	}
 	/*
@@ -3512,7 +3513,7 @@
 
 	if (pmu_conf->use_rr_dbregs == 0) return 0;
 
-	DPRINT(("called for [%d]\n", task->pid));
+	DPRINT(("called for [%d]\n", task_pid_nr(task)));
 
 	/*
 	 * do it only once
@@ -3543,7 +3544,7 @@
 	DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
 		  pfm_sessions.pfs_ptrace_use_dbregs,
 		  pfm_sessions.pfs_sys_use_dbregs,
-		  task->pid, ret));
+		  task_pid_nr(task), ret));
 
 	UNLOCK_PFS(flags);
 
@@ -3568,7 +3569,7 @@
 
 	LOCK_PFS(flags);
 	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
-		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
+		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task));
 		ret = -1;
 	}  else {
 		pfm_sessions.pfs_ptrace_use_dbregs--;
@@ -3620,7 +3621,7 @@
 
 	/* sanity check */
 	if (unlikely(task == NULL)) {
-		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
+		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current));
 		return -EINVAL;
 	}
 
@@ -3629,7 +3630,7 @@
 		fmt = ctx->ctx_buf_fmt;
 
 		DPRINT(("restarting self %d ovfl=0x%lx\n",
-			task->pid,
+			task_pid_nr(task),
 			ctx->ctx_ovfl_regs[0]));
 
 		if (CTX_HAS_SMPL(ctx)) {
@@ -3653,11 +3654,11 @@
 				pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
 
 			if (rst_ctrl.bits.mask_monitoring == 0) {
-				DPRINT(("resuming monitoring for [%d]\n", task->pid));
+				DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task)));
 
 				if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
 			} else {
-				DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
+				DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task)));
 
 				// cannot use pfm_stop_monitoring(task, regs);
 			}
@@ -3714,10 +3715,10 @@
 	 * "self-monitoring".
 	 */
 	if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
-		DPRINT(("unblocking [%d] \n", task->pid));
+		DPRINT(("unblocking [%d] \n", task_pid_nr(task)));
 		complete(&ctx->ctx_restart_done);
 	} else {
-		DPRINT(("[%d] armed exit trap\n", task->pid));
+		DPRINT(("[%d] armed exit trap\n", task_pid_nr(task)));
 
 		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
 
@@ -3805,7 +3806,7 @@
 	 * don't bother if we are loaded and task is being debugged
 	 */
 	if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
-		DPRINT(("debug registers already in use for [%d]\n", task->pid));
+		DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task)));
 		return -EBUSY;
 	}
 
@@ -3846,7 +3847,7 @@
 	 * is shared by all processes running on it
  	 */
 	if (first_time && can_access_pmu) {
-		DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
+		DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task)));
 		for (i=0; i < pmu_conf->num_ibrs; i++) {
 			ia64_set_ibr(i, 0UL);
 			ia64_dv_serialize_instruction();
@@ -4035,7 +4036,7 @@
 		return -EBUSY;
 	}
 	DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
-		PFM_CTX_TASK(ctx)->pid,
+		task_pid_nr(PFM_CTX_TASK(ctx)),
 		state,
 		is_system));
 	/*
@@ -4093,7 +4094,7 @@
 		 * monitoring disabled in kernel at next reschedule
 		 */
 		ctx->ctx_saved_psr_up = 0;
-		DPRINT(("task=[%d]\n", task->pid));
+		DPRINT(("task=[%d]\n", task_pid_nr(task)));
 	}
 	return 0;
 }
@@ -4298,11 +4299,12 @@
 
 		if (is_system) {
 			if (pfm_sessions.pfs_ptrace_use_dbregs) {
-				DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
+				DPRINT(("cannot load [%d] dbregs in use\n",
+							task_pid_nr(task)));
 				ret = -EBUSY;
 			} else {
 				pfm_sessions.pfs_sys_use_dbregs++;
-				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
+				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs));
 				set_dbregs = 1;
 			}
 		}
@@ -4394,7 +4396,7 @@
 
 			/* allow user level control */
 			ia64_psr(regs)->sp = 0;
-			DPRINT(("clearing psr.sp for [%d]\n", task->pid));
+			DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task)));
 
 			SET_LAST_CPU(ctx, smp_processor_id());
 			INC_ACTIVATION();
@@ -4429,7 +4431,7 @@
 		 */
 		SET_PMU_OWNER(task, ctx);
 
-		DPRINT(("context loaded on PMU for [%d]\n", task->pid));
+		DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task)));
 	} else {
 		/*
 		 * when not current, task MUST be stopped, so this is safe
@@ -4493,7 +4495,7 @@
 	int prev_state, is_system;
 	int ret;
 
-	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
+	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1));
 
 	prev_state = ctx->ctx_state;
 	is_system  = ctx->ctx_fl_system;
@@ -4568,7 +4570,7 @@
 		 */
 		ia64_psr(regs)->sp = 1;
 
-		DPRINT(("setting psr.sp for [%d]\n", task->pid));
+		DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task)));
 	}
 	/*
 	 * save PMDs to context
@@ -4608,7 +4610,7 @@
 	ctx->ctx_fl_can_restart  = 0;
 	ctx->ctx_fl_going_zombie = 0;
 
-	DPRINT(("disconnected [%d] from context\n", task->pid));
+	DPRINT(("disconnected [%d] from context\n", task_pid_nr(task)));
 
 	return 0;
 }
@@ -4631,7 +4633,7 @@
 
 	PROTECT_CTX(ctx, flags);
 
-	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
+	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task)));
 
 	state = ctx->ctx_state;
 	switch(state) {
@@ -4640,13 +4642,13 @@
 	 		 * only comes to this function if pfm_context is not NULL, i.e., cannot
 			 * be in unloaded state
 	 		 */
-			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
+			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task));
 			break;
 		case PFM_CTX_LOADED:
 		case PFM_CTX_MASKED:
 			ret = pfm_context_unload(ctx, NULL, 0, regs);
 			if (ret) {
-				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
 			}
 			DPRINT(("ctx unloaded for current state was %d\n", state));
 
@@ -4655,12 +4657,12 @@
 		case PFM_CTX_ZOMBIE:
 			ret = pfm_context_unload(ctx, NULL, 0, regs);
 			if (ret) {
-				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
 			}
 			free_ok = 1;
 			break;
 		default:
-			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
+			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state);
 			break;
 	}
 	UNPROTECT_CTX(ctx, flags);
@@ -4744,7 +4746,7 @@
 	DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
 		ctx->ctx_fd,
 		state,
-		task->pid,
+		task_pid_nr(task),
 		task->state, PFM_CMD_STOPPED(cmd)));
 
 	/*
@@ -4791,7 +4793,7 @@
 	 */
 	if (PFM_CMD_STOPPED(cmd)) {
 		if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-			DPRINT(("[%d] task not in stopped state\n", task->pid));
+			DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task)));
 			return -EBUSY;
 		}
 		/*
@@ -4884,7 +4886,7 @@
 	 * limit abuse to min page size
 	 */
 	if (unlikely(sz > PFM_MAX_ARGSIZE)) {
-		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
+		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz);
 		return -E2BIG;
 	}
 
@@ -5031,11 +5033,11 @@
 {
 	int ret;
 
-	DPRINT(("entering for [%d]\n", current->pid));
+	DPRINT(("entering for [%d]\n", task_pid_nr(current)));
 
 	ret = pfm_context_unload(ctx, NULL, 0, regs);
 	if (ret) {
-		printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
+		printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret);
 	}
 
 	/*
@@ -5072,7 +5074,7 @@
 
 	ctx = PFM_GET_CTX(current);
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
+		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", task_pid_nr(current));
 		return;
 	}
 
@@ -5269,7 +5271,7 @@
 	DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
 		     "used_pmds=0x%lx\n",
 			pmc0,
-			task ? task->pid: -1,
+			task ? task_pid_nr(task): -1,
 			(regs ? regs->cr_iip : 0),
 			CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
 			ctx->ctx_used_pmds[0]));
@@ -5458,7 +5460,7 @@
 	}
 
 	DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
-			GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
+			GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1,
 			PFM_GET_WORK_PENDING(task),
 			ctx->ctx_fl_trap_reason,
 			ovfl_pmds,
@@ -5483,7 +5485,7 @@
 sanity_check:
 	printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
 			smp_processor_id(),
-			task ? task->pid : -1,
+			task ? task_pid_nr(task) : -1,
 			pmc0);
 	return;
 
@@ -5516,7 +5518,7 @@
 	 *
 	 * Overall pretty hairy stuff....
 	 */
-	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
+	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1));
 	pfm_clear_psr_up();
 	ia64_psr(regs)->up = 0;
 	ia64_psr(regs)->sp = 1;
@@ -5577,13 +5579,13 @@
 
 report_spurious1:
 	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
-		this_cpu, task->pid);
+		this_cpu, task_pid_nr(task));
 	pfm_unfreeze_pmu();
 	return -1;
 report_spurious2:
 	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 
 		this_cpu, 
-		task->pid);
+		task_pid_nr(task));
 	pfm_unfreeze_pmu();
 	return -1;
 }
@@ -5870,7 +5872,8 @@
 	ia64_psr(regs)->sp = 1;
 
 	if (GET_PMU_OWNER() == task) {
-		DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
+		DPRINT(("cleared ownership for [%d]\n",
+					task_pid_nr(ctx->ctx_task)));
 		SET_PMU_OWNER(NULL, NULL);
 	}
 
@@ -5882,7 +5885,7 @@
 	task->thread.pfm_context  = NULL;
 	task->thread.flags       &= ~IA64_THREAD_PM_VALID;
 
-	DPRINT(("force cleanup for [%d]\n",  task->pid));
+	DPRINT(("force cleanup for [%d]\n",  task_pid_nr(task)));
 }
 
 
@@ -6426,7 +6429,7 @@
 
 		if (PMD_IS_COUNTING(i)) {
 			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
-				task->pid,
+				task_pid_nr(task),
 				i,
 				ctx->ctx_pmds[i].val,
 				val & ovfl_val));
@@ -6448,11 +6451,11 @@
 			 */
 			if (pmc0 & (1UL << i)) {
 				val += 1 + ovfl_val;
-				DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
+				DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i));
 			}
 		}
 
-		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
+		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val));
 
 		if (is_self) ctx->th_pmds[i] = pmd_val;
 
@@ -6793,14 +6796,14 @@
 	printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 
 		this_cpu, 
 		from, 
-		current->pid, 
+		task_pid_nr(current),
 		regs->cr_iip,
 		current->comm);
 
 	task = GET_PMU_OWNER();
 	ctx  = GET_PMU_CTX();
 
-	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
+	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx);
 
 	psr = pfm_get_psr();
 
@@ -6848,7 +6851,7 @@
 {
 	struct thread_struct *thread;
 
-	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
+	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task)));
 
 	thread = &task->thread;
 
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
index ff80eab..a7af1cb 100644
--- a/arch/ia64/kernel/perfmon_default_smpl.c
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -44,11 +44,11 @@
 	int ret = 0;
 
 	if (data == NULL) {
-		DPRINT(("[%d] no argument passed\n", task->pid));
+		DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
 		return -EINVAL;
 	}
 
-	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
+	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
 
 	/*
 	 * must hold at least the buffer header + one minimally sized entry
@@ -88,7 +88,7 @@
 	hdr->hdr_count        = 0UL;
 
 	DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
-		task->pid,
+		task_pid_nr(task),
 		buf,
 		hdr->hdr_buf_size,
 		sizeof(*hdr),
@@ -245,7 +245,7 @@
 static int
 default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
 {
-	DPRINT(("[%d] exit(%p)\n", task->pid, buf));
+	DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index c613fc0..2418289 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -105,7 +105,8 @@
 	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
 
 	print_modules();
-	printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
+	printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
+			smp_processor_id(), current->comm);
 	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
 	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
 	print_symbol("ip is at %s\n", ip);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c5cfcfa..cbf67f1 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -208,6 +208,48 @@
 
 __initcall(register_memory);
 
+
+#ifdef CONFIG_KEXEC
+static void __init setup_crashkernel(unsigned long total, int *n)
+{
+	unsigned long long base = 0, size = 0;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, total,
+			&size, &base);
+	if (ret == 0 && size > 0) {
+		if (!base) {
+			sort_regions(rsvd_region, *n);
+			base = kdump_find_rsvd_region(size,
+					rsvd_region, *n);
+		}
+		if (base != ~0UL) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(size >> 20),
+					(unsigned long)(base >> 20),
+					(unsigned long)(total >> 20));
+			rsvd_region[*n].start =
+				(unsigned long)__va(base);
+			rsvd_region[*n].end =
+				(unsigned long)__va(base + size);
+			(*n)++;
+			crashk_res.start = base;
+			crashk_res.end = base + size - 1;
+		}
+	}
+	efi_memmap_res.start = ia64_boot_param->efi_memmap;
+	efi_memmap_res.end = efi_memmap_res.start +
+		ia64_boot_param->efi_memmap_size;
+	boot_param_res.start = __pa(ia64_boot_param);
+	boot_param_res.end = boot_param_res.start +
+		sizeof(*ia64_boot_param);
+}
+#else
+static inline void __init setup_crashkernel(unsigned long total, int *n)
+{}
+#endif
+
 /**
  * reserve_memory - setup reserved memory areas
  *
@@ -219,6 +261,7 @@
 reserve_memory (void)
 {
 	int n = 0;
+	unsigned long total_memory;
 
 	/*
 	 * none of the entries in this table overlap
@@ -254,50 +297,11 @@
 		n++;
 #endif
 
-	efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+	total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
 	n++;
 
-#ifdef CONFIG_KEXEC
-	/* crashkernel=size@offset specifies the size to reserve for a crash
-	 * kernel. If offset is 0, then it is determined automatically.
-	 * By reserving this memory we guarantee that linux never set's it
-	 * up as a DMA target.Useful for holding code to do something
-	 * appropriate after a kernel panic.
-	 */
-	{
-		char *from = strstr(boot_command_line, "crashkernel=");
-		unsigned long base, size;
-		if (from) {
-			size = memparse(from + 12, &from);
-			if (*from == '@')
-				base = memparse(from+1, &from);
-			else
-				base = 0;
-			if (size) {
-				if (!base) {
-					sort_regions(rsvd_region, n);
-					base = kdump_find_rsvd_region(size,
-							      	rsvd_region, n);
-					}
-				if (base != ~0UL) {
-					rsvd_region[n].start =
-						(unsigned long)__va(base);
-					rsvd_region[n].end =
-						(unsigned long)__va(base + size);
-					n++;
-					crashk_res.start = base;
-					crashk_res.end = base + size - 1;
-				}
-			}
-		}
-		efi_memmap_res.start = ia64_boot_param->efi_memmap;
-                efi_memmap_res.end = efi_memmap_res.start +
-                        ia64_boot_param->efi_memmap_size;
-                boot_param_res.start = __pa(ia64_boot_param);
-                boot_param_res.end = boot_param_res.start +
-                        sizeof(*ia64_boot_param);
-	}
-#endif
+	setup_crashkernel(total_memory, &n);
+
 	/* end of memory marker */
 	rsvd_region[n].start = ~0UL;
 	rsvd_region[n].end   = ~0UL;
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index aeec818..cdb64cc 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -227,7 +227,7 @@
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = task_pid_vnr(current);
 	si.si_uid = current->uid;
 	si.si_addr = sc;
 	force_sig_info(SIGSEGV, &si, current);
@@ -332,7 +332,7 @@
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = task_pid_vnr(current);
 	si.si_uid = current->uid;
 	si.si_addr = addr;
 	force_sig_info(SIGSEGV, &si, current);
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 3aeaf15..78d65cb 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -61,7 +61,7 @@
 
 	if (++die.lock_owner_depth < 3) {
 		printk("%s[%d]: %s %ld [%d]\n",
-			current->comm, current->pid, str, err, ++die_counter);
+		current->comm, task_pid_nr(current), str, err, ++die_counter);
 		(void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
 		show_regs(regs);
   	} else
@@ -315,7 +315,7 @@
 				last.time = current_jiffies + 5 * HZ;
 				printk(KERN_WARNING
 		       			"%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
-		       			current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
+		       			current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
 			}
 		}
 	}
@@ -453,7 +453,7 @@
 		if (code == 8) {
 # ifdef CONFIG_IA64_PRINT_HAZARDS
 			printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
-			       current->comm, current->pid,
+			       current->comm, task_pid_nr(current),
 			       regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
 # endif
 			return;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index fe6aa5a..2173de9 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1340,7 +1340,8 @@
 			size_t len;
 
 			len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
-				      "ip=0x%016lx\n\r", current->comm, current->pid,
+				      "ip=0x%016lx\n\r", current->comm,
+				      task_pid_nr(current),
 				      ifa, regs->cr_iip + ipsr->ri);
 			/*
 			 * Don't call tty_write_message() if we're in the kernel; we might
@@ -1363,7 +1364,7 @@
 				       "administrator\n"
 				       "echo 0 > /proc/sys/kernel/ignore-"
 				       "unaligned-usertrap to re-enable\n",
-				       current->comm, current->pid);
+				       current->comm, task_pid_nr(current));
 			}
 		}
 	} else {
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 32f2625..7571076 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -274,7 +274,7 @@
 
   out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 3e10152..c6c19bf 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -127,8 +127,8 @@
 		vma->vm_mm = current->mm;
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
-		vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
 		vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 		down_write(&current->mm->mmap_sem);
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig
deleted file mode 100644
index 97271ab..0000000
--- a/arch/ia64/oprofile/Kconfig
+++ /dev/null
@@ -1,20 +0,0 @@
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  Due to firmware bugs, you may need to use the "nohalt" boot
-	  option if you're using OProfile with the hardware performance
-	  counters.
-
-	  If unsure, say N.
-
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index bd5fe76..ab9a264 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -426,7 +426,7 @@
 
 source "fs/Kconfig"
 
-source "arch/m32r/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/m32r/Kconfig.debug"
 
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 97e0b1c..89ba4a0 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -196,7 +196,7 @@
 		printk("SPI: %08lx\n", sp);
 	}
 	printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
-		current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current);
+		current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index 70a766a..4a71df4 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -271,7 +271,7 @@
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(tsk)) {
+	if (is_global_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/m32r/oprofile/Kconfig b/arch/m32r/oprofile/Kconfig
deleted file mode 100644
index 19d3773..0000000
--- a/arch/m32r/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 20a9c08..01dee84 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -683,6 +683,8 @@
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/m68k/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 4e2752a..97f556f 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -900,7 +900,7 @@
 	       regs->d4, regs->d5, regs->a0, regs->a1);
 
 	printk("Process %s (pid: %d, task=%p)\n",
-		current->comm, current->pid, current);
+		current->comm, task_pid_nr(current), current);
 	addr = (unsigned long)&fp->un;
 	printk("Frame format=%X ", regs->format);
 	switch (regs->format) {
@@ -1038,7 +1038,7 @@
 				fp->un.fmtb.daddr, space_names[ssw & DFC],
 				fp->ptregs.pc);
 	}
-	printk ("Current process id is %d\n", current->pid);
+	printk ("Current process id is %d\n", task_pid_nr(current));
 	die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
 }
 
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index eaa6186..f493f03 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -180,7 +180,7 @@
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 185906b..f52c627 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -696,6 +696,8 @@
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/m68knommu/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cb02758..4dc142d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2005,7 +2005,7 @@
 
 source "fs/Kconfig"
 
-source "arch/mips/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/mips/Kconfig.debug"
 
diff --git a/arch/mips/au1000/pb1200/irqmap.c b/arch/mips/au1000/pb1200/irqmap.c
index 5f48b06..bdf00e2 100644
--- a/arch/mips/au1000/pb1200/irqmap.c
+++ b/arch/mips/au1000/pb1200/irqmap.c
@@ -36,8 +36,8 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/delay.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/mipsregs.h>
diff --git a/arch/mips/basler/excite/excite_irq.c b/arch/mips/basler/excite/excite_irq.c
index 1ecab63..4903e06 100644
--- a/arch/mips/basler/excite/excite_irq.c
+++ b/arch/mips/basler/excite/excite_irq.c
@@ -29,7 +29,7 @@
 #include <linux/timex.h>
 #include <linux/slab.h>
 #include <linux/random.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/arch/mips/bcm47xx/time.c b/arch/mips/bcm47xx/time.c
index 0ab4676..0c6f47b 100644
--- a/arch/mips/bcm47xx/time.c
+++ b/arch/mips/bcm47xx/time.c
@@ -46,10 +46,3 @@
 	/* Set MIPS counter frequency for fixed_rate_gettimeoffset() */
 	mips_hpt_frequency = hz;
 }
-
-void __init
-plat_timer_setup(struct irqaction *irq)
-{
-	/* Enable the timer interrupt */
-	setup_irq(7, irq);
-}
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index 49bcc58..892d4c3 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -175,6 +175,7 @@
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
diff --git a/arch/mips/configs/mipssim_defconfig b/arch/mips/configs/mipssim_defconfig
index 86dcb74..61b72f5 100644
--- a/arch/mips/configs/mipssim_defconfig
+++ b/arch/mips/configs/mipssim_defconfig
@@ -1,71 +1,68 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.20
-# Tue Feb 20 21:47:35 2007
+# Linux kernel version: 2.6.23
+# Thu Oct 18 22:45:52 2007
 #
 CONFIG_MIPS=y
 
 #
 # Machine selection
 #
-CONFIG_ZONE_DMA=y
-# CONFIG_MIPS_MTX1 is not set
-# CONFIG_MIPS_BOSPORUS is not set
-# CONFIG_MIPS_PB1000 is not set
-# CONFIG_MIPS_PB1100 is not set
-# CONFIG_MIPS_PB1500 is not set
-# CONFIG_MIPS_PB1550 is not set
-# CONFIG_MIPS_PB1200 is not set
-# CONFIG_MIPS_DB1000 is not set
-# CONFIG_MIPS_DB1100 is not set
-# CONFIG_MIPS_DB1500 is not set
-# CONFIG_MIPS_DB1550 is not set
-# CONFIG_MIPS_DB1200 is not set
-# CONFIG_MIPS_MIRAGE is not set
+# CONFIG_MACH_ALCHEMY is not set
 # CONFIG_BASLER_EXCITE is not set
+# CONFIG_BCM47XX is not set
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MACH_JAZZ is not set
+# CONFIG_LASAT is not set
+# CONFIG_LEMOTE_FULONG is not set
 # CONFIG_MIPS_ATLAS is not set
 # CONFIG_MIPS_MALTA is not set
 # CONFIG_MIPS_SEAD is not set
-# CONFIG_WR_PPMC is not set
 CONFIG_MIPS_SIM=y
-# CONFIG_MOMENCO_JAGUAR_ATX is not set
-# CONFIG_MIPS_XXS1500 is not set
+# CONFIG_MARKEINS is not set
+# CONFIG_MACH_VR41XX is not set
 # CONFIG_PNX8550_JBS is not set
 # CONFIG_PNX8550_STB810 is not set
-# CONFIG_MACH_VR41XX is not set
+# CONFIG_PMC_MSP is not set
 # CONFIG_PMC_YOSEMITE is not set
 # CONFIG_QEMU is not set
-# CONFIG_MARKEINS is not set
 # CONFIG_SGI_IP22 is not set
 # CONFIG_SGI_IP27 is not set
 # CONFIG_SGI_IP32 is not set
-# CONFIG_SIBYTE_BIGSUR is not set
-# CONFIG_SIBYTE_SWARM is not set
-# CONFIG_SIBYTE_SENTOSA is not set
-# CONFIG_SIBYTE_RHONE is not set
-# CONFIG_SIBYTE_CARMEL is not set
-# CONFIG_SIBYTE_PTSWARM is not set
-# CONFIG_SIBYTE_LITTLESUR is not set
 # CONFIG_SIBYTE_CRHINE is not set
+# CONFIG_SIBYTE_CARMEL is not set
 # CONFIG_SIBYTE_CRHONE is not set
+# CONFIG_SIBYTE_RHONE is not set
+# CONFIG_SIBYTE_SWARM is not set
+# CONFIG_SIBYTE_LITTLESUR is not set
+# CONFIG_SIBYTE_SENTOSA is not set
+# CONFIG_SIBYTE_PTSWARM is not set
+# CONFIG_SIBYTE_BIGSUR is not set
 # CONFIG_SNI_RM is not set
 # CONFIG_TOSHIBA_JMR3927 is not set
 # CONFIG_TOSHIBA_RBTX4927 is not set
 # CONFIG_TOSHIBA_RBTX4938 is not set
+# CONFIG_WR_PPMC is not set
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 # CONFIG_ARCH_HAS_ILOG2_U32 is not set
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_GENERIC_FIND_NEXT_BIT=y
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CMOS_UPDATE=y
 CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 # CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ is not set
+CONFIG_BOOT_RAW=y
+CONFIG_CEVT_R4K=y
 CONFIG_DMA_NONCOHERENT=y
 CONFIG_DMA_NEED_PCI_MAP_STATE=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_SYS_HAS_EARLY_PRINTK=y
+# CONFIG_HOTPLUG_CPU is not set
+# CONFIG_NO_IOPORT is not set
 # CONFIG_CPU_BIG_ENDIAN is not set
 CONFIG_CPU_LITTLE_ENDIAN=y
 CONFIG_SYS_SUPPORTS_BIG_ENDIAN=y
@@ -76,6 +73,11 @@
 #
 # CPU selection
 #
+# CONFIG_TICK_ONESHOT is not set
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+# CONFIG_CPU_LOONGSON2 is not set
 CONFIG_CPU_MIPS32_R1=y
 # CONFIG_CPU_MIPS32_R2 is not set
 # CONFIG_CPU_MIPS64_R1 is not set
@@ -115,8 +117,8 @@
 CONFIG_MIPS_MT_DISABLED=y
 # CONFIG_MIPS_MT_SMP is not set
 # CONFIG_MIPS_MT_SMTC is not set
+CONFIG_SYS_SUPPORTS_MULTITHREADING=y
 # CONFIG_MIPS_VPE_LOADER is not set
-# CONFIG_64BIT_PHYS_ADDR is not set
 CONFIG_CPU_HAS_LLSC=y
 CONFIG_CPU_HAS_SYNC=y
 CONFIG_GENERIC_HARDIRQS=y
@@ -130,50 +132,52 @@
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
 CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_RESOURCES_64BIT is not set
-CONFIG_ZONE_DMA_FLAG=1
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
 # CONFIG_HZ_48 is not set
-# CONFIG_HZ_100 is not set
+CONFIG_HZ_100=y
 # CONFIG_HZ_128 is not set
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_256 is not set
-CONFIG_HZ_1000=y
+# CONFIG_HZ_1000 is not set
 # CONFIG_HZ_1024 is not set
 CONFIG_SYS_SUPPORTS_ARBIT_HZ=y
-CONFIG_HZ=1000
+CONFIG_HZ=100
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_KEXEC is not set
+# CONFIG_SECCOMP is not set
 CONFIG_LOCKDEP_SUPPORT=y
 CONFIG_STACKTRACE_SUPPORT=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
-# Code maturity level options
+# General setup
 #
 CONFIG_EXPERIMENTAL=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
+# CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
-# CONFIG_IPC_NS is not set
 CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_POSIX_MQUEUE is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
-# CONFIG_UTS_NS is not set
+# CONFIG_USER_NS is not set
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_FAIR_USER_SCHED=y
 CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
+# CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_EMBEDDED=y
@@ -187,31 +191,29 @@
 CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
-CONFIG_SLAB=y
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
 CONFIG_RT_MUTEXES=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_FORCE_UNLOAD is not set
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_KMOD=y
-
-#
-# Block layer
-#
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
 
 #
 # IO Schedulers
@@ -229,18 +231,11 @@
 #
 # Bus options (PCI, PCMCIA, EISA, ISA, TC)
 #
+# CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_MMU=y
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
 # CONFIG_PCCARD is not set
 
 #
-# PCI Hotplug Support
-#
-
-#
 # Executable file formats
 #
 CONFIG_BINFMT_ELF=y
@@ -250,9 +245,8 @@
 #
 # Power management options
 #
-CONFIG_PM=y
-# CONFIG_PM_LEGACY is not set
-# CONFIG_PM_DEBUG is not set
+# CONFIG_PM is not set
+CONFIG_SUSPEND_UP_POSSIBLE=y
 
 #
 # Networking
@@ -262,75 +256,50 @@
 #
 # Networking options
 #
-# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
-CONFIG_XFRM=y
-# CONFIG_XFRM_USER is not set
-# CONFIG_XFRM_SUB_POLICY is not set
-CONFIG_XFRM_MIGRATE=y
-CONFIG_NET_KEY=y
-CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NET_KEY is not set
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_ASK_IP_FIB_HASH=y
 # CONFIG_IP_FIB_TRIE is not set
 CONFIG_IP_FIB_HASH=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
-CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_MULTIPLE_TABLES is not set
+# CONFIG_IP_ROUTE_MULTIPATH is not set
+# CONFIG_IP_ROUTE_VERBOSE is not set
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 # CONFIG_IP_PNP_RARP is not set
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
+# CONFIG_IP_MROUTE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
 # CONFIG_INET_XFRM_TUNNEL is not set
 # CONFIG_INET_TUNNEL is not set
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
 CONFIG_TCP_CONG_CUBIC=y
 CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
+# CONFIG_TCP_MD5SIG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_INET6_XFRM_TUNNEL is not set
 # CONFIG_INET6_TUNNEL is not set
-CONFIG_NETWORK_SECMARK=y
+# CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
 # CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_MSG is not set
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_HMAC_NONE is not set
-# CONFIG_SCTP_HMAC_SHA1 is not set
-CONFIG_SCTP_HMAC_MD5=y
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
+# CONFIG_IP_SCTP is not set
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
@@ -347,44 +316,7 @@
 #
 # QoS and/or fair queueing
 #
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_FIFO=y
-CONFIG_NET_SCH_CLK_JIFFIES=y
-# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
-# CONFIG_NET_SCH_CLK_CPU is not set
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_INGRESS=m
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_ROUTE=y
-# CONFIG_NET_CLS_FW is not set
-# CONFIG_NET_CLS_U32 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_EMATCH is not set
-# CONFIG_NET_CLS_ACT is not set
-# CONFIG_NET_CLS_POLICE is not set
-CONFIG_NET_ESTIMATOR=y
+# CONFIG_NET_SCHED is not set
 
 #
 # Network testing
@@ -393,8 +325,17 @@
 # CONFIG_HAMRADIO is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
 # CONFIG_IEEE80211 is not set
-CONFIG_FIB_RULES=y
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
 
 #
 # Device Drivers
@@ -403,52 +344,25 @@
 #
 # Generic Driver Options
 #
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FW_LOADER is not set
 # CONFIG_DEBUG_DRIVER is not set
 # CONFIG_DEBUG_DEVRES is not set
 # CONFIG_SYS_HYPERVISOR is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
 # CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
 # CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
 # CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-# CONFIG_PNPACPI is not set
-
-#
-# Block devices
-#
+CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=y
 # CONFIG_BLK_DEV_CRYPTOLOOP is not set
 CONFIG_BLK_DEV_NBD=y
 # CONFIG_BLK_DEV_RAM is not set
-# CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_ATA_OVER_ETH is not set
-
-#
-# Misc devices
-#
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
+# CONFIG_MISC_DEVICES is not set
 # CONFIG_IDE is not set
 
 #
@@ -456,48 +370,29 @@
 #
 # CONFIG_RAID_ATTRS is not set
 # CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
 # CONFIG_SCSI_NETLINK is not set
-
-#
-# Serial ATA (prod) and Parallel ATA (experimental) drivers
-#
 # CONFIG_ATA is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
 # CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
+# CONFIG_VETH is not set
 # CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
 CONFIG_NET_ETHERNET=y
 # CONFIG_MII is not set
+# CONFIG_AX88796 is not set
 CONFIG_MIPS_SIM_NET=y
 # CONFIG_DM9000 is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_B44 is not set
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 
@@ -513,49 +408,18 @@
 # CONFIG_NETCONSOLE is not set
 # CONFIG_NETPOLL is not set
 # CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
 # CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
 # CONFIG_PHONE is not set
 
 #
 # Input device support
 #
-CONFIG_INPUT=y
-# CONFIG_INPUT_FF_MEMLESS is not set
-
-#
-# Userland interfaces
-#
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
+# CONFIG_INPUT is not set
 
 #
 # Hardware I/O ports
 #
-CONFIG_SERIO=y
-# CONFIG_SERIO_I8042 is not set
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_LIBPS2 is not set
-# CONFIG_SERIO_RAW is not set
+# CONFIG_SERIO is not set
 # CONFIG_GAMEPORT is not set
 
 #
@@ -581,31 +445,13 @@
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
 # CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
 # CONFIG_WATCHDOG is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
 # CONFIG_TCG_TPM is not set
-
-#
-# I2C support
-#
 # CONFIG_I2C is not set
 
 #
@@ -613,118 +459,60 @@
 #
 # CONFIG_SPI is not set
 # CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
 # CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
 
 #
-# Hardware Monitoring support
+# Sonics Silicon Backplane
 #
-# CONFIG_HWMON is not set
-# CONFIG_HWMON_VID is not set
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_SM501 is not set
 
 #
 # Multimedia devices
 #
 # CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_DAB is not set
 
 #
 # Graphics support
 #
-# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
 # CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
 
 #
 # Sound
 #
 # CONFIG_SOUND is not set
-
-#
-# HID Devices
-#
-# CONFIG_HID is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-# CONFIG_USB_ARCH_HAS_EHCI is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
+# CONFIG_USB_SUPPORT is not set
 # CONFIG_MMC is not set
-
-#
-# LED devices
-#
 # CONFIG_NEW_LEDS is not set
-
-#
-# LED drivers
-#
-
-#
-# LED Triggers
-#
-
-#
-# InfiniBand support
-#
-
-#
-# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
-#
-
-#
-# Real Time Clock
-#
+CONFIG_RTC_LIB=y
 # CONFIG_RTC_CLASS is not set
 
 #
-# DMA Engine support
+# Userspace I/O
 #
-# CONFIG_DMA_ENGINE is not set
-
-#
-# DMA Clients
-#
-
-#
-# DMA Devices
-#
-
-#
-# Auxiliary Display support
-#
-
-#
-# Virtualization
-#
+# CONFIG_UIO is not set
 
 #
 # File systems
 #
-CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT2_FS is not set
 # CONFIG_EXT3_FS is not set
 # CONFIG_EXT4DEV_FS is not set
 # CONFIG_REISERFS_FS is not set
@@ -732,6 +520,7 @@
 # CONFIG_FS_POSIX_ACL is not set
 # CONFIG_XFS_FS is not set
 # CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
 # CONFIG_MINIX_FS is not set
 CONFIG_ROMFS_FS=y
 # CONFIG_INOTIFY is not set
@@ -760,10 +549,11 @@
 CONFIG_PROC_FS=y
 # CONFIG_PROC_KCORE is not set
 CONFIG_PROC_SYSCTL=y
-# CONFIG_SYSFS is not set
-# CONFIG_TMPFS is not set
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
 # CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
+# CONFIG_CONFIGFS_FS is not set
 
 #
 # Miscellaneous filesystems
@@ -781,10 +571,7 @@
 # CONFIG_QNX4FS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
+CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 # CONFIG_NFS_V3_ACL is not set
@@ -796,6 +583,7 @@
 CONFIG_LOCKD_V4=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
+# CONFIG_SUNRPC_BIND34 is not set
 # CONFIG_RPCSEC_GSS_KRB5 is not set
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -803,22 +591,14 @@
 # CONFIG_NCP_FS is not set
 # CONFIG_CODA_FS is not set
 # CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
 
 #
 # Partition Types
 #
 # CONFIG_PARTITION_ADVANCED is not set
 CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
 # CONFIG_NLS is not set
-
-#
-# Distributed Lock Manager
-#
+# CONFIG_DLM is not set
 
 #
 # Profiling support
@@ -833,20 +613,22 @@
 CONFIG_ENABLE_MUST_CHECK=y
 # CONFIG_MAGIC_SYSRQ is not set
 # CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
-CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_SLAB is not set
 # CONFIG_DEBUG_RT_MUTEXES is not set
 # CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
-CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_LOCK_ALLOC is not set
 # CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_DEBUG_KOBJECT is not set
@@ -854,7 +636,9 @@
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 CONFIG_FORCED_INLINING=y
+# CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_FAULT_INJECTION is not set
 CONFIG_CROSSCOMPILE=y
 CONFIG_CMDLINE="nfsroot=192.168.192.169:/u1/mipsel,timeo=20 ip=dhcp"
 # CONFIG_DEBUG_STACK_USAGE is not set
@@ -865,60 +649,20 @@
 # Security options
 #
 # CONFIG_KEYS is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_BLKCIPHER=m
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-CONFIG_CRYPTO_GF128MUL=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_LRW=m
-# CONFIG_CRYPTO_DES is not set
-CONFIG_CRYPTO_FCRYPT=m
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-CONFIG_CRYPTO_CAMELLIA=m
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
 
 #
 # Library routines
 #
-CONFIG_BITREVERSE=y
 # CONFIG_CRC_CCITT is not set
-CONFIG_CRC16=y
-CONFIG_CRC32=y
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
diff --git a/arch/mips/configs/sb1250-swarm_defconfig b/arch/mips/configs/sb1250-swarm_defconfig
index 3ed991a..49dfcef 100644
--- a/arch/mips/configs/sb1250-swarm_defconfig
+++ b/arch/mips/configs/sb1250-swarm_defconfig
@@ -196,6 +196,7 @@
 # CONFIG_UTS_NS is not set
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
diff --git a/arch/mips/emma2rh/markeins/setup.c b/arch/mips/emma2rh/markeins/setup.c
index 5e1da53..82f9e901 100644
--- a/arch/mips/emma2rh/markeins/setup.c
+++ b/arch/mips/emma2rh/markeins/setup.c
@@ -104,12 +104,6 @@
 	mips_hpt_frequency = (bus_frequency * (4 + reg)) / 4 / 2;
 }
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	/* we are using the cpu counter for timer interrupts */
-	setup_irq(CPU_IRQ_BASE + 7, irq);
-}
-
 static void markeins_board_init(void);
 extern void markeins_irq_setup(void);
 
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 08b84d4..a915e56 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
 
+#include <asm/smtc_ipi.h>
 #include <asm/time.h>
 
 static int mips_next_event(unsigned long delta,
diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c
index b997af7..7852c7c 100644
--- a/arch/mips/kernel/irixelf.c
+++ b/arch/mips/kernel/irixelf.c
@@ -1172,8 +1172,8 @@
 	prstatus.pr_sighold = current->blocked.sig[0];
 	psinfo.pr_pid = prstatus.pr_pid = current->pid;
 	psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
-	psinfo.pr_pgrp = prstatus.pr_pgrp = process_group(current);
-	psinfo.pr_sid = prstatus.pr_sid = process_session(current);
+	psinfo.pr_pgrp = prstatus.pr_pgrp = task_pgrp_nr(current);
+	psinfo.pr_sid = prstatus.pr_sid = task_session_nr(current);
 	if (current->pid == current->tgid) {
 		/*
 		 * This is the record for the group leader.  Add in the
diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c
index 85c2e38..a0a9105 100644
--- a/arch/mips/kernel/irixsig.c
+++ b/arch/mips/kernel/irixsig.c
@@ -609,7 +609,7 @@
 		p = list_entry(_p, struct task_struct, sibling);
 		if ((type == IRIX_P_PID) && p->pid != pid)
 			continue;
-		if ((type == IRIX_P_PGID) && process_group(p) != pid)
+		if ((type == IRIX_P_PGID) && task_pgrp_nr(p) != pid)
 			continue;
 		if ((p->exit_signal != SIGCHLD))
 			continue;
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index ee7790d..4c477c7 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -763,11 +763,11 @@
 	printk("[%s:%d] setpgrp(%d) ", current->comm, current->pid, flags);
 #endif
 	if(!flags)
-		error = process_group(current);
+		error = task_pgrp_nr(current);
 	else
 		error = sys_setsid();
 #ifdef DEBUG_PROCGRPS
-	printk("returning %d\n", process_group(current));
+	printk("returning %d\n", task_pgrp_nr(current));
 #endif
 
 	return error;
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index ea7cfe7..c4e6866 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -40,17 +40,6 @@
 #include <irq.h>
 
 /*
- * The integer part of the number of usecs per jiffy is taken from tick,
- * but the fractional part is not recorded, so we calculate it using the
- * initial value of HZ.  This aids systems where tick isn't really an
- * integer (e.g. for HZ = 128).
- */
-#define USECS_PER_JIFFY		TICK_SIZE
-#define USECS_PER_JIFFY_FRAC	((unsigned long)(u32)((1000000ULL << 32) / HZ))
-
-#define TICK_SIZE	(tick_nsec / 1000)
-
-/*
  * forward reference
  */
 DEFINE_SPINLOCK(rtc_lock);
@@ -182,25 +171,48 @@
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init init_mips_clocksource(void)
+void __init clocksource_set_clock(struct clocksource *cs, unsigned int clock)
 {
 	u64 temp;
 	u32 shift;
 
+	/* Find a shift value */
+	for (shift = 32; shift > 0; shift--) {
+		temp = (u64) NSEC_PER_SEC << shift;
+		do_div(temp, clock);
+		if ((temp >> 32) == 0)
+			break;
+	}
+	cs->shift = shift;
+	cs->mult = (u32) temp;
+}
+
+void __cpuinit clockevent_set_clock(struct clock_event_device *cd,
+	unsigned int clock)
+{
+	u64 temp;
+	u32 shift;
+
+	/* Find a shift value */
+	for (shift = 32; shift > 0; shift--) {
+		temp = (u64) NSEC_PER_SEC << shift;
+		do_div(temp, clock);
+		if ((temp >> 32) == 0)
+			break;
+	}
+	cd->shift = shift;
+	cd->mult = (u32) temp;
+}
+
+static void __init init_mips_clocksource(void)
+{
 	if (!mips_hpt_frequency || clocksource_mips.read == null_hpt_read)
 		return;
 
 	/* Calclate a somewhat reasonable rating value */
 	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
-	/* Find a shift value */
-	for (shift = 32; shift > 0; shift--) {
-		temp = (u64) NSEC_PER_SEC << shift;
-		do_div(temp, mips_hpt_frequency);
-		if ((temp >> 32) == 0)
-			break;
-	}
-	clocksource_mips.shift = shift;
-	clocksource_mips.mult = (u32)temp;
+
+	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
 
 	clocksource_register(&clocksource_mips);
 }
@@ -213,54 +225,6 @@
 {
 }
 
-#ifdef CONFIG_MIPS_MT_SMTC
-DEFINE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
-
-static void smtc_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
-{
-}
-
-static void mips_broadcast(cpumask_t mask)
-{
-	unsigned int cpu;
-
-	for_each_cpu_mask(cpu, mask)
-		smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
-}
-
-static void setup_smtc_dummy_clockevent_device(void)
-{
-	//uint64_t mips_freq = mips_hpt_^frequency;
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *cd;
-
-	cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-
-	cd->name		= "SMTC";
-	cd->features		= CLOCK_EVT_FEAT_DUMMY;
-
-	/* Calculate the min / max delta */
-	cd->mult	= 0; //div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
-	cd->shift		= 0; //32;
-	cd->max_delta_ns	= 0; //clockevent_delta2ns(0x7fffffff, cd);
-	cd->min_delta_ns	= 0; //clockevent_delta2ns(0x30, cd);
-
-	cd->rating		= 200;
-	cd->irq			= 17; //-1;
-//	if (cpu)
-//		cd->cpumask	= CPU_MASK_ALL; // cpumask_of_cpu(cpu);
-//	else
-		cd->cpumask	= cpumask_of_cpu(cpu);
-
-	cd->set_mode		= smtc_set_mode;
-
-	cd->broadcast		= mips_broadcast;
-
-	clockevents_register_device(cd);
-}
-#endif
-
 void __init time_init(void)
 {
 	plat_time_init();
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 7b78d13..fa50078 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -314,7 +314,7 @@
 	__show_regs(regs);
 	print_modules();
 	printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n",
-	        current->comm, current->pid, current_thread_info(), current);
+	        current->comm, task_pid_nr(current), current_thread_info(), current);
 	show_stacktrace(current, regs);
 	show_code((unsigned int __user *) regs->cp0_epc);
 	printk("\n");
diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c
index 09314a2..2cc67459 100644
--- a/arch/mips/lemote/lm2e/setup.c
+++ b/arch/mips/lemote/lm2e/setup.c
@@ -53,11 +53,6 @@
 unsigned int memsize;
 unsigned int highmemsize = 0;
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(MIPS_CPU_IRQ_BASE + 7, irq);
-}
-
 void __init plat_time_init(void)
 {
 	/* setup mips r4k timer */
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 5699c77..fa636fc 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -173,7 +173,7 @@
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(tsk)) {
+	if (is_global_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/mips/oprofile/Kconfig b/arch/mips/oprofile/Kconfig
deleted file mode 100644
index fb6f235..0000000
--- a/arch/mips/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING && !MIPS_MT_SMTC && EXPERIMENTAL
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c
index f221d47..7cfeda5 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_time.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c
@@ -86,8 +86,5 @@
 #ifdef CONFIG_IRQ_MSP_CIC
 	/* we are using the vpe0 counter for timer interrupts */
 	setup_irq(MSP_INT_VPE0_TIMER, irq);
-#else
-	/* we are using the mips counter for timer interrupts */
-	setup_irq(MSP_INT_TIMER, irq);
 #endif
 }
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 015fcc3..855977c 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -137,11 +137,6 @@
 	return 0;
 }
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(7, irq);
-}
-
 void __init plat_time_init(void)
 {
 	mips_hpt_frequency = cpu_clock_freq / 2;
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 6eac36d..02b266a 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -69,8 +69,9 @@
 
 void bcm1480_smp_finish(void)
 {
-	extern void bcm1480_time_init(void);
-	bcm1480_time_init();
+	extern void sb1480_clockevent_init(void);
+
+	sb1480_clockevent_init();
 	local_irq_enable();
 }
 
diff --git a/arch/mips/sibyte/bcm1480/time.c b/arch/mips/sibyte/bcm1480/time.c
index 5b4bfbb..c730744 100644
--- a/arch/mips/sibyte/bcm1480/time.c
+++ b/arch/mips/sibyte/bcm1480/time.c
@@ -27,9 +27,8 @@
  */
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/percpu.h>
 #include <linux/spinlock.h>
-#include <linux/kernel_stat.h>
 
 #include <asm/irq.h>
 #include <asm/addrspace.h>
@@ -101,25 +100,36 @@
 		break;
 
 	case CLOCK_EVT_MODE_UNUSED:	/* shuddup gcc */
+	case CLOCK_EVT_MODE_RESUME:
 		;
 	}
 }
 
-struct clock_event_device sibyte_hpt_clockevent = {
-	.name		= "bcm1480-counter",
-	.features	= CLOCK_EVT_FEAT_PERIODIC,
-	.set_mode	= sibyte_set_mode,
-	.shift		= 32,
-	.irq		= 0,
-};
+static int sibyte_next_event(unsigned long delta, struct clock_event_device *cd)
+{
+	unsigned int cpu = smp_processor_id();
+	void __iomem *timer_init;
+	unsigned int cnt;
+	int res;
+
+	timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
+	cnt = __raw_readq(timer_init);
+	cnt += delta;
+	__raw_writeq(cnt, timer_init);
+	res = ((long)(__raw_readq(timer_init) - cnt ) > 0) ? -ETIME : 0;
+
+	return res;
+}
+
+static DEFINE_PER_CPU(struct clock_event_device, sibyte_hpt_clockevent);
 
 static irqreturn_t sibyte_counter_handler(int irq, void *dev_id)
 {
-	struct clock_event_device *cd = &sibyte_hpt_clockevent;
 	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *cd = &per_cpu(sibyte_hpt_clockevent, cpu);
 
 	/* Reset the timer */
-	__raw_writeq(M_SCD_TIMER_ENABLE|M_SCD_TIMER_MODE_CONTINUOUS,
+	__raw_writeq(M_SCD_TIMER_ENABLE | M_SCD_TIMER_MODE_CONTINUOUS,
 	             IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)));
 	cd->event_handler(cd);
 
@@ -140,26 +150,23 @@
  * called directly from irq_handler.S when IP[4] is set during an
  * interrupt
  */
-static void __init sb1480_clockevent_init(void)
+void __cpuinit sb1480_clockevent_init(void)
 {
 	unsigned int cpu = smp_processor_id();
 	unsigned int irq = K_BCM1480_INT_TIMER_0 + cpu;
+	struct clock_event_device *cd = &per_cpu(sibyte_hpt_clockevent, cpu);
+
+	cd->name		= "bcm1480-counter";
+	cd->features		= CLOCK_EVT_FEAT_PERIODIC |
+				  CLOCK_EVT_MODE_ONESHOT;
+	cd->set_next_event	= sibyte_next_event;
+	cd->set_mode		= sibyte_set_mode;
+	cd->irq			= irq;
+	clockevent_set_clock(cd, BCM1480_HPT_VALUE);
 
 	setup_irq(irq, &sibyte_counter_irqaction);
 }
 
-void bcm1480_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	int irq = K_BCM1480_INT_TIMER_0 + cpu;
-
-	/* Reset the timer */
-	__raw_writeq(M_SCD_TIMER_ENABLE|M_SCD_TIMER_MODE_CONTINUOUS,
-	      IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)));
-
-	ll_timer_interrupt(irq);
-}
-
 static cycle_t bcm1480_hpt_read(void)
 {
 	/* We assume this function is called xtime_lock held. */
@@ -168,9 +175,26 @@
 	return (jiffies + 1) * (BCM1480_HPT_VALUE / HZ) - count;
 }
 
+struct clocksource bcm1480_clocksource = {
+	.name	= "MIPS",
+	.rating	= 200,
+	.read	= bcm1480_hpt_read,
+	.mask	= CLOCKSOURCE_MASK(32),
+	.shift	= 32,
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+void __init sb1480_clocksource_init(void)
+{
+	struct clocksource *cs = &bcm1480_clocksource;
+
+	clocksource_set_clock(cs, BCM1480_HPT_VALUE);
+	clocksource_register(cs);
+}
+
 void __init bcm1480_hpt_setup(void)
 {
-	clocksource_mips.read = bcm1480_hpt_read;
 	mips_hpt_frequency = BCM1480_HPT_VALUE;
+	sb1480_clocksource_init();
 	sb1480_clockevent_init();
 }
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index 7659174..500d17e 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -400,43 +400,11 @@
 
 #endif 	/* CONFIG_KGDB */
 
-static inline void sb1250_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	int irq = K_INT_TIMER_0 + cpu;
-
-	irq_enter();
-	kstat_this_cpu.irqs[irq]++;
-
-	write_seqlock(&xtime_lock);
-
-	/* ACK interrupt */
-	____raw_writeq(M_SCD_TIMER_ENABLE | M_SCD_TIMER_MODE_CONTINUOUS,
-		       IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)));
-
-	/*
-	 * call the generic timer interrupt handling
-	 */
-	do_timer(1);
-
-	write_sequnlock(&xtime_lock);
-
-	/*
-	 * In UP mode, we call local_timer_interrupt() to do profiling
-	 * and process accouting.
-	 *
-	 * In SMP mode, local_timer_interrupt() is invoked by appropriate
-	 * low-level local timer interrupt handler.
-	 */
-	local_timer_interrupt(irq);
-
-	irq_exit();
-}
-
 extern void sb1250_mailbox_interrupt(void);
 
 asmlinkage void plat_irq_dispatch(void)
 {
+	unsigned int cpu = smp_processor_id();
 	unsigned int pending;
 
 	/*
@@ -454,7 +422,7 @@
 	if (pending & CAUSEF_IP7) /* CPU performance counter interrupt */
 		do_IRQ(MIPS_CPU_IRQ_BASE + 7);
 	else if (pending & CAUSEF_IP4)
-		sb1250_timer_interrupt();
+		do_IRQ(K_INT_TIMER_0 + cpu); 	/* sb1250_timer_interrupt() */
 
 #ifdef CONFIG_SMP
 	else if (pending & CAUSEF_IP3)
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index c38e1f3..aaa4f30 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -57,8 +57,9 @@
 
 void sb1250_smp_finish(void)
 {
-	extern void sb1250_time_init(void);
-	sb1250_time_init();
+	extern void sb1250_clockevent_init(void);
+
+	sb1250_clockevent_init();
 	local_irq_enable();
 }
 
diff --git a/arch/mips/sibyte/sb1250/time.c b/arch/mips/sibyte/sb1250/time.c
index fe11fed..9ef5462 100644
--- a/arch/mips/sibyte/sb1250/time.c
+++ b/arch/mips/sibyte/sb1250/time.c
@@ -100,6 +100,7 @@
 		break;
 
 	case CLOCK_EVT_MODE_UNUSED:	/* shuddup gcc */
+	case CLOCK_EVT_MODE_RESUME:
 		;
 	}
 }
@@ -144,79 +145,7 @@
 	.name		= "timer",
 };
 
-/*
- * The general purpose timer ticks at 1 Mhz independent if
- * the rest of the system
- */
-static void sibyte_set_mode(enum clock_event_mode mode,
-                           struct clock_event_device *evt)
-{
-	unsigned int cpu = smp_processor_id();
-	void __iomem *timer_cfg, *timer_init;
-
-	timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
-	timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
-
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		__raw_writeq(0, timer_cfg);
-		__raw_writeq((V_SCD_TIMER_FREQ / HZ) - 1, timer_init);
-		__raw_writeq(M_SCD_TIMER_ENABLE | M_SCD_TIMER_MODE_CONTINUOUS,
-			     timer_cfg);
-		break;
-
-	case CLOCK_EVT_MODE_ONESHOT:
-		/* Stop the timer until we actually program a shot */
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		__raw_writeq(0, timer_cfg);
-		break;
-
-	case CLOCK_EVT_MODE_UNUSED:	/* shuddup gcc */
-		;
-	}
-}
-
-static int
-sibyte_next_event(unsigned long delta, struct clock_event_device *evt)
-{
-	unsigned int cpu = smp_processor_id();
-	void __iomem *timer_cfg, *timer_init;
-
-	timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
-	timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
-
-	__raw_writeq(0, timer_cfg);
-	__raw_writeq(delta, timer_init);
-	__raw_writeq(M_SCD_TIMER_ENABLE, timer_cfg);
-
-	return 0;
-}
-
-struct clock_event_device sibyte_hpt_clockevent = {
-	.name		= "sb1250-counter",
-	.features	= CLOCK_EVT_FEAT_PERIODIC,
-	.set_mode	= sibyte_set_mode,
-	.set_next_event	= sibyte_next_event,
-	.shift		= 32,
-	.irq		= 0,
-};
-
-static irqreturn_t sibyte_counter_handler(int irq, void *dev_id)
-{
-	struct clock_event_device *cd = &sibyte_hpt_clockevent;
-
-	cd->event_handler(cd);
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction sibyte_irqaction = {
-	.handler	= sibyte_counter_handler,
-	.flags		= IRQF_DISABLED | IRQF_PERCPU,
-	.name		= "timer",
-};
-
-static void __init sb1250_clockevent_init(void)
+void __cpuinit sb1250_clockevent_init(void)
 {
 	struct clock_event_device *cd = &sibyte_hpt_clockevent;
 	unsigned int cpu = smp_processor_id();
@@ -249,12 +178,6 @@
 	clockevents_register_device(cd);
 }
 
-void __init plat_time_init(void)
-{
-	sb1250_clocksource_init();
-	sb1250_clockevent_init();
-}
-
 /*
  * The HPT is free running from SB1250_HPT_VALUE down to 0 then starts over
  * again.
@@ -267,3 +190,26 @@
 
 	return SB1250_HPT_VALUE - count;
 }
+
+struct clocksource bcm1250_clocksource = {
+	.name	= "MIPS",
+	.rating	= 200,
+	.read	= sb1250_hpt_read,
+	.mask	= CLOCKSOURCE_MASK(32),
+	.shift	= 32,
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+void __init sb1250_clocksource_init(void)
+{
+	struct clocksource *cs = &bcm1250_clocksource;
+
+	clocksource_set_clock(cs, V_SCD_TIMER_FREQ);
+	clocksource_register(cs);
+}
+
+void __init plat_time_init(void)
+{
+	sb1250_clocksource_init();
+	sb1250_clockevent_init();
+}
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 8b3ef0e..080c966 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -69,31 +69,6 @@
 	return "SiByte " SIBYTE_BOARD_NAME;
 }
 
-void __init plat_time_init(void)
-{
-#if defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X)
-	/* Setup HPT */
-	sb1250_hpt_setup();
-#endif
-}
-
-void __init plat_timer_setup(struct irqaction *irq)
-{
-        /*
-         * we don't set up irqaction, because we will deliver timer
-         * interrupts through low-level (direct) meachanism.
-         */
-
-        /* We only need to setup the generic timer */
-#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
-	bcm1480_time_init();
-#elif defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X)
-	sb1250_time_init();
-#else
-#error invalid SiByte board configuration
-#endif
-}
-
 int swarm_be_handler(struct pt_regs *regs, int is_fixup)
 {
 	if (!is_fixup && (regs->cp0_cause & 4)) {
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index b808773..0910b35 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -121,15 +121,6 @@
 	setup_pit_timer();
 }
 
-/*
- * R4k counter based timer interrupt. Works on RM200-225 and possibly
- * others but not on RM400
- */
-static void __init sni_cpu_timer_setup(struct irqaction *irq)
-{
-        setup_irq(SNI_MIPS_IRQ_CPU_TIMER, irq);
-}
-
 void __init plat_timer_setup(struct irqaction *irq)
 {
 	switch (sni_brd_type) {
@@ -139,15 +130,6 @@
 	case SNI_BRD_MINITOWER:
 	        sni_a20r_timer_setup(irq);
 	        break;
-
-	case SNI_BRD_PCI_TOWER:
-	case SNI_BRD_RM200:
-	case SNI_BRD_PCI_MTOWER:
-	case SNI_BRD_PCI_DESKTOP:
-	case SNI_BRD_PCI_TOWER_CPLUS:
-	case SNI_BRD_PCI_MTOWER_CPLUS:
-	        sni_cpu_timer_setup(irq);
-	        break;
 	}
 }
 
diff --git a/arch/mips/tx4927/common/tx4927_setup.c b/arch/mips/tx4927/common/tx4927_setup.c
index 8ce0989..36c5f20 100644
--- a/arch/mips/tx4927/common/tx4927_setup.c
+++ b/arch/mips/tx4927/common/tx4927_setup.c
@@ -72,22 +72,6 @@
 #endif
 }
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(TX4927_IRQ_CPU_TIMER, irq);
-
-#ifdef CONFIG_TOSHIBA_RBTX4927
-	{
-		extern void toshiba_rbtx4927_timer_setup(struct irqaction
-							 *irq);
-		toshiba_rbtx4927_timer_setup(irq);
-	}
-#endif
-
-	return;
-}
-
-
 #ifdef DEBUG
 void print_cp0(char *key, int num, char *name, u32 val)
 {
diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
index b97102a..c7470fb 100644
--- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
+++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
@@ -94,7 +94,6 @@
 #define TOSHIBA_RBTX4927_SETUP_EFWFU       ( 1 <<  3 )
 #define TOSHIBA_RBTX4927_SETUP_SETUP       ( 1 <<  4 )
 #define TOSHIBA_RBTX4927_SETUP_TIME_INIT   ( 1 <<  5 )
-#define TOSHIBA_RBTX4927_SETUP_TIMER_SETUP ( 1 <<  6 )
 #define TOSHIBA_RBTX4927_SETUP_PCIBIOS     ( 1 <<  7 )
 #define TOSHIBA_RBTX4927_SETUP_PCI1        ( 1 <<  8 )
 #define TOSHIBA_RBTX4927_SETUP_PCI2        ( 1 <<  9 )
@@ -108,7 +107,6 @@
     (TOSHIBA_RBTX4927_SETUP_NONE | TOSHIBA_RBTX4927_SETUP_INFO |
      TOSHIBA_RBTX4927_SETUP_WARN | TOSHIBA_RBTX4927_SETUP_EROR |
      TOSHIBA_RBTX4927_SETUP_EFWFU | TOSHIBA_RBTX4927_SETUP_SETUP |
-     TOSHIBA_RBTX4927_SETUP_TIME_INIT | TOSHIBA_RBTX4927_SETUP_TIMER_SETUP
      | TOSHIBA_RBTX4927_SETUP_PCIBIOS | TOSHIBA_RBTX4927_SETUP_PCI1 |
      TOSHIBA_RBTX4927_SETUP_PCI2 | TOSHIBA_RBTX4927_SETUP_PCI66);
 #endif
@@ -947,14 +945,6 @@
 
 }
 
-void __init toshiba_rbtx4927_timer_setup(struct irqaction *irq)
-{
-	TOSHIBA_RBTX4927_SETUP_DPRINTK(TOSHIBA_RBTX4927_SETUP_TIMER_SETUP,
-				       "-\n");
-	TOSHIBA_RBTX4927_SETUP_DPRINTK(TOSHIBA_RBTX4927_SETUP_TIMER_SETUP,
-				       "+\n");
-}
-
 static int __init toshiba_rbtx4927_rtc_init(void)
 {
 	static struct resource __initdata res = {
diff --git a/arch/mips/tx4938/common/setup.c b/arch/mips/tx4938/common/setup.c
index ab40822..3ba4101 100644
--- a/arch/mips/tx4938/common/setup.c
+++ b/arch/mips/tx4938/common/setup.c
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/irq.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -43,8 +43,3 @@
 {
 	toshiba_rbtx4938_setup();
 }
-
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(TX4938_IRQ_CPU_TIMER, irq);
-}
diff --git a/arch/mips/vr41xx/common/init.c b/arch/mips/vr41xx/common/init.c
index 407cec2..8d760df 100644
--- a/arch/mips/vr41xx/common/init.c
+++ b/arch/mips/vr41xx/common/init.c
@@ -48,11 +48,6 @@
 		mips_hpt_frequency = tclock / 4;
 }
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(TIMER_IRQ, irq);
-}
-
 void __init plat_mem_setup(void)
 {
 	vr41xx_calculate_clock_frequency();
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 3d73545..b8ef178 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -267,7 +267,7 @@
 
 source "fs/Kconfig"
 
-source "arch/parisc/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/parisc/Kconfig.debug"
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index fb35ebc..2ce3806 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -181,7 +181,7 @@
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = task_pid_vnr(current);
 	si.si_uid = current->uid;
 	si.si_addr = &frame->uc;
 	force_sig_info(SIGSEGV, &si, current);
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index bbf029a..99fd569 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -219,7 +219,7 @@
 			return; /* STFU */
 
 		printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
-			current->comm, current->pid, str, err, regs->iaoq[0]);
+			current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
 #ifdef PRINT_USER_FAULTS
 		/* XXX for debugging only */
 		show_regs(regs);
@@ -252,7 +252,7 @@
 	
 	if (err)
 		printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
-			current->comm, current->pid, str, err);
+			current->comm, task_pid_nr(current), str, err);
 
 	/* Wot's wrong wif bein' racy? */
 	if (current->thread.flags & PARISC_KERNEL_DEATH) {
@@ -317,7 +317,7 @@
 	if (unlikely(iir != GDB_BREAK_INSN)) {
 		printk(KERN_DEBUG "break %d,%d: pid=%d command='%s'\n",
 			iir & 31, (iir>>13) & ((1<<13)-1),
-			current->pid, current->comm);
+			task_pid_nr(current), current->comm);
 		show_regs(regs);
 	}
 #endif
@@ -747,7 +747,7 @@
 		if (user_mode(regs)) {
 #ifdef PRINT_USER_FAULTS
 			printk(KERN_DEBUG "\nhandle_interruption() pid=%d command='%s'\n",
-			    current->pid, current->comm);
+			    task_pid_nr(current), current->comm);
 			show_regs(regs);
 #endif
 			/* SIGBUS, for lack of a better one. */
@@ -772,7 +772,7 @@
 		else
 			printk(KERN_DEBUG "User Fault (long pointer) (fault %d) ",
 			       code);
-		printk("pid=%d command='%s'\n", current->pid, current->comm);
+		printk("pid=%d command='%s'\n", task_pid_nr(current), current->comm);
 		show_regs(regs);
 #endif
 		si.si_signo = SIGSEGV;
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 347bb92..aebf3c1 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -469,7 +469,7 @@
 		    && ++unaligned_count < 5) {
 			char buf[256];
 			sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n",
-				current->comm, current->pid, regs->ior, regs->iaoq[0]);
+				current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]);
 			printk(KERN_WARNING "%s", buf);
 #ifdef DEBUG_UNALIGNED
 			show_regs(regs);
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 1c091b4..b2e3e9a 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -211,7 +211,7 @@
 #ifdef PRINT_USER_FAULTS
 		printk(KERN_DEBUG "\n");
 		printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n",
-		    tsk->pid, tsk->comm, code, address);
+		    task_pid_nr(tsk), tsk->comm, code, address);
 		if (vma) {
 			printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n",
 					vma->vm_start, vma->vm_end);
diff --git a/arch/parisc/oprofile/Kconfig b/arch/parisc/oprofile/Kconfig
deleted file mode 100644
index 5ade198..0000000
--- a/arch/parisc/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3763f68..18f397ca 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -669,20 +669,7 @@
 
 source "lib/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/powerpc/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes"
-	depends on !BOOKE && !4xx && KALLSYMS && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/powerpc/Kconfig.debug"
 
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 8b47c84..dcd7c02 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -68,6 +68,7 @@
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index bb8d4e4..05582af 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -71,6 +71,7 @@
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=17
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index c09eb8c..62a3840 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -71,6 +71,7 @@
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=17
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index e60a0c5..c0c8e8c 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -61,45 +61,39 @@
 	for(;;);
 }
 
-static int __init early_parse_crashk(char *p)
-{
-	unsigned long size;
-
-	if (!p)
-		return 1;
-
-	size = memparse(p, &p);
-
-	if (*p == '@')
-		crashk_res.start = memparse(p + 1, &p);
-	else
-		crashk_res.start = KDUMP_KERNELBASE;
-
-	crashk_res.end = crashk_res.start + size - 1;
-
-	return 0;
-}
-early_param("crashkernel", early_parse_crashk);
-
 void __init reserve_crashkernel(void)
 {
-	unsigned long size;
+	unsigned long long crash_size, crash_base;
+	int ret;
 
-	if (crashk_res.start == 0)
+	/* this is necessary because of lmb_phys_mem_size() */
+	lmb_analyze();
+
+	/* use common parsing */
+	ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(),
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size > 0) {
+		if (crash_base == 0)
+			crash_base = KDUMP_KERNELBASE;
+		crashk_res.start = crash_base;
+	} else {
+		/* handle the device tree */
+		crash_size = crashk_res.end - crashk_res.start + 1;
+	}
+
+	if (crash_size == 0)
 		return;
 
 	/* We might have got these values via the command line or the
 	 * device tree, either way sanitise them now. */
 
-	size = crashk_res.end - crashk_res.start + 1;
-
 	if (crashk_res.start != KDUMP_KERNELBASE)
 		printk("Crash kernel location must be 0x%x\n",
 				KDUMP_KERNELBASE);
 
 	crashk_res.start = KDUMP_KERNELBASE;
-	size = PAGE_ALIGN(size);
-	crashk_res.end = crashk_res.start + size - 1;
+	crash_size = PAGE_ALIGN(crash_size);
+	crashk_res.end = crashk_res.start + crash_size - 1;
 
 	/* Crash kernel trumps memory limit */
 	if (memory_limit && memory_limit <= crashk_res.end) {
@@ -108,7 +102,13 @@
 				memory_limit);
 	}
 
-	lmb_reserve(crashk_res.start, size);
+	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+			"for crashkernel (System RAM: %ldMB)\n",
+			(unsigned long)(crash_size >> 20),
+			(unsigned long)(crashk_res.start >> 20),
+			(unsigned long)(lmb_phys_mem_size() >> 20));
+
+	lmb_reserve(crashk_res.start, crash_size);
 }
 
 int overlaps_crashkernel(unsigned long start, unsigned long size)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ea6ad7a..b9d8837 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -459,7 +459,7 @@
 		printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
 #endif
 	printk("TASK = %p[%d] '%s' THREAD: %p",
-	       current, current->pid, current->comm, task_thread_info(current));
+	       current, task_pid_nr(current), current->comm, task_thread_info(current));
 
 #ifdef CONFIG_SMP
 	printk(" CPU: %d", smp_processor_id());
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index bf9e39c..59c464e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -201,7 +201,7 @@
 	 * generate the same exception over and over again and we get
 	 * nowhere.  Better to kill it and let the kernel panic.
 	 */
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		__sighandler_t handler;
 
 		spin_lock_irq(&current->sighand->siglock);
@@ -881,7 +881,7 @@
 void trace_syscall(struct pt_regs *regs)
 {
 	printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
-	       current, current->pid, regs->nip, regs->link, regs->gpr[0],
+	       current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
 	       regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
 }
 
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ab3546c..a18fda3 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -375,7 +375,7 @@
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig
deleted file mode 100644
index 7089e79..0000000
--- a/arch/powerpc/oprofile/Kconfig
+++ /dev/null
@@ -1,24 +0,0 @@
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-config OPROFILE_CELL
-	bool "OProfile for Cell Broadband Engine"
-	depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
-	default y
-	help
-	  Profiling of Cell BE SPUs requires special support enabled
-	  by this option.
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 354c058..144177d 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -41,13 +41,13 @@
 #include <linux/root_dev.h>
 #include <linux/serial.h>
 #include <linux/smp.h>
+#include <linux/bitops.h>
 
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/kexec.h>
 #include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 3a393c7..a1ab25c 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -332,7 +332,7 @@
 		   err->disposition == RTAS_DISP_NOT_RECOVERED &&
 		   err->target == RTAS_TARGET_MEMORY &&
 		   err->type == RTAS_TYPE_ECC_UNCORR &&
-		   !(current->pid == 0 || is_init(current))) {
+		   !(current->pid == 0 || is_global_init(current))) {
 		/* Kill off a user process with an ECC error */
 		printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
 		       current->pid);
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index 607925c..6473fa7 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -1317,7 +1317,7 @@
 
 source "lib/Kconfig"
 
-source "arch/powerpc/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/ppc/Kconfig.debug"
 
diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c
index 3f3b292..c785689 100644
--- a/arch/ppc/kernel/traps.c
+++ b/arch/ppc/kernel/traps.c
@@ -121,7 +121,7 @@
 	 * generate the same exception over and over again and we get
 	 * nowhere.  Better to kill it and let the kernel panic.
 	 */
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		__sighandler_t handler;
 
 		spin_lock_irq(&current->sighand->siglock);
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c
index 94913dd..254c23b 100644
--- a/arch/ppc/mm/fault.c
+++ b/arch/ppc/mm/fault.c
@@ -290,7 +290,7 @@
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/ppc/platforms/chestnut.c b/arch/ppc/platforms/chestnut.c
index 248684f..dcd6070 100644
--- a/arch/ppc/platforms/chestnut.c
+++ b/arch/ppc/platforms/chestnut.c
@@ -49,7 +49,6 @@
 extern void gen550_init(int, struct uart_port *);
 extern void mv64360_pcibios_fixup(mv64x60_handle_t *bh);
 
-#define BIT(x) (1<<x)
 #define CHESTNUT_PRESERVE_MASK (BIT(MV64x60_CPU2DEV_0_WIN) | \
 				BIT(MV64x60_CPU2DEV_1_WIN) | \
 				BIT(MV64x60_CPU2DEV_2_WIN) | \
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b711321..4ec716d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -529,21 +529,7 @@
 
 source "fs/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/s390/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.	register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/s390/Kconfig.debug"
 
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index abb447a..70c5737 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -166,7 +166,7 @@
 
         printk("CPU:    %d    %s\n", task_thread_info(tsk)->cpu, print_tainted());
         printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
-	       current->comm, current->pid, (void *) tsk,
+	       current->comm, task_pid_nr(current), (void *) tsk,
 	       (void *) tsk->thread.ksp);
 
 	show_registers(regs);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 60604b2..b159a9d 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -64,7 +64,7 @@
 
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 14c241c..2456b52 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -211,7 +211,7 @@
 	struct mm_struct *mm = tsk->mm;
 
 	up_read(&mm->mmap_sem);
-	if (is_init(tsk)) {
+	if (is_global_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		return 1;
diff --git a/arch/s390/oprofile/Kconfig b/arch/s390/oprofile/Kconfig
deleted file mode 100644
index 208220a..0000000
--- a/arch/s390/oprofile/Kconfig
+++ /dev/null
@@ -1,22 +0,0 @@
-
-menu "Profiling support"
-
-config PROFILING
-	bool "Profiling support"
-	help
-	  Say Y here to enable profiling support mechanisms used by
-	  profilers such as readprofile or OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 44982c1..247f8a6 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -758,7 +758,7 @@
 
 source "fs/Kconfig"
 
-source "arch/sh/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sh/Kconfig.debug"
 
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 790ed69..5c17de5 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -104,24 +104,3 @@
 	(*rnk)(page_list, reboot_code_buffer, image->start, vbr_reg);
 }
 
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel.  By reserving this memory we guarantee
- * that linux never sets it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init parse_crashkernel(char *arg)
-{
-	unsigned long size, base;
-	size = memparse(arg, &arg);
-	if (*arg == '@') {
-		base = memparse(arg+1, &arg);
-		/* FIXME: Do I want a sanity check
-		 * to validate the memory range?
-		 */
-		crashk_res.start = base;
-		crashk_res.end   = base + size - 1;
-	}
-	return 0;
-}
-early_param("crashkernel", parse_crashkernel);
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index b446999..6d7f2b0 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -121,7 +121,7 @@
 void show_regs(struct pt_regs * regs)
 {
 	printk("\n");
-	printk("Pid : %d, Comm: %20s\n", current->pid, current->comm);
+	printk("Pid : %d, Comm: %20s\n", task_pid_nr(current), current->comm);
 	print_symbol("PC is at %s\n", instruction_pointer(regs));
 	printk("PC  : %08lx SP  : %08lx SR  : %08lx ",
 	       regs->pc, regs->regs[15], regs->sr);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index b3027a6..b749403 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -128,6 +128,37 @@
 	free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(pages));
 }
 
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long free_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+
+	ret = parse_crashkernel(boot_command_line, free_mem,
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size) {
+		if (crash_base > 0) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(crash_size >> 20),
+					(unsigned long)(crash_base >> 20),
+					(unsigned long)(free_mem >> 20));
+			crashk_res.start = crash_base;
+			crashk_res.end   = crash_base + crash_size - 1;
+			reserve_bootmem(crash_base, crash_size);
+		} else
+			printk(KERN_INFO "crashkernel reservation failed - "
+					"you have to specify a base address\n");
+	}
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
 void __init setup_bootmem_allocator(unsigned long free_pfn)
 {
 	unsigned long bootmap_size;
@@ -189,11 +220,8 @@
 		}
 	}
 #endif
-#ifdef CONFIG_KEXEC
-	if (crashk_res.start != crashk_res.end)
-		reserve_bootmem(crashk_res.start,
-			crashk_res.end - crashk_res.start + 1);
-#endif
+
+	reserve_crashkernel();
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c
index 2f42442..ca754fd 100644
--- a/arch/sh/kernel/signal.c
+++ b/arch/sh/kernel/signal.c
@@ -382,7 +382,7 @@
 	set_fs(USER_DS);
 
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
-		 current->comm, current->pid, frame, regs->pc, regs->pr);
+		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
 	flush_cache_sigtramp(regs->pr);
 
@@ -462,7 +462,7 @@
 	set_fs(USER_DS);
 
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
-		 current->comm, current->pid, frame, regs->pc, regs->pr);
+		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
 	flush_cache_sigtramp(regs->pr);
 
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index dcb46e7..cf99111c 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -95,8 +95,8 @@
 	print_modules();
 	show_regs(regs);
 
-	printk("Process: %s (pid: %d, stack limit = %p)\n",
-	       current->comm, current->pid, task_stack_page(current) + 1);
+	printk("Process: %s (pid: %d, stack limit = %p)\n", current->comm,
+			task_pid_nr(current), task_stack_page(current) + 1);
 
 	if (!user_mode(regs) || in_interrupt())
 		dump_mem("Stack: ", regs->regs[15], THREAD_SIZE +
@@ -386,7 +386,8 @@
 
 		printk(KERN_NOTICE "Fixing up unaligned userspace access "
 		       "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
-		       current->comm,current->pid,(u16*)regs->pc,instruction);
+		       current->comm, task_pid_nr(current),
+		       (u16 *)regs->pc, instruction);
 	}
 
 	ret = -EFAULT;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 4729668..f33cedb 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -207,7 +207,7 @@
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/sh/oprofile/Kconfig b/arch/sh/oprofile/Kconfig
deleted file mode 100644
index 5ade198..0000000
--- a/arch/sh/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/sh64/Kconfig b/arch/sh64/Kconfig
index b3327ce..ba204ba 100644
--- a/arch/sh64/Kconfig
+++ b/arch/sh64/Kconfig
@@ -284,7 +284,7 @@
 
 source "fs/Kconfig"
 
-source "arch/sh64/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sh64/Kconfig.debug"
 
diff --git a/arch/sh64/kernel/traps.c b/arch/sh64/kernel/traps.c
index 9d0d58f..c03101f 100644
--- a/arch/sh64/kernel/traps.c
+++ b/arch/sh64/kernel/traps.c
@@ -764,7 +764,7 @@
 		--user_mode_unaligned_fixup_count;
 		/* Only do 'count' worth of these reports, to remove a potential DoS against syslog */
 		printk("Fixing up unaligned userspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
-		       current->comm, current->pid, (__u32)regs->pc, opcode);
+		       current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
 	} else
 #endif
 	if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) {
@@ -774,7 +774,7 @@
 			       (__u32)regs->pc, opcode);
 		} else {
 			printk("Fixing up unaligned kernelspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
-			       current->comm, current->pid, (__u32)regs->pc, opcode);
+			       current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
 		}
 	}
 
diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c
index dd81c66..7c79a1b 100644
--- a/arch/sh64/mm/fault.c
+++ b/arch/sh64/mm/fault.c
@@ -81,7 +81,7 @@
 
 static inline void print_task(struct task_struct *tsk)
 {
-	printk("Task pid %d\n", tsk->pid);
+	printk("Task pid %d\n", task_pid_nr(tsk));
 }
 
 static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
@@ -272,13 +272,13 @@
 			 * usermode, so only need a few */
 			count++;
 			printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
-				address, current->pid, current->comm,
+				address, task_pid_nr(current), current->comm,
 				(unsigned long) regs->pc);
 #if 0
 			show_regs(regs);
 #endif
 		}
-		if (is_init(tsk)) {
+		if (is_global_init(tsk)) {
 			panic("INIT had user mode bad_area\n");
 		}
 		tsk->thread.address = address;
@@ -320,14 +320,14 @@
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		panic("INIT out of memory\n");
 		yield();
 		goto survive;
 	}
 	printk("fault:Out of memory\n");
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/sh64/oprofile/Kconfig b/arch/sh64/oprofile/Kconfig
deleted file mode 100644
index 19d3773..0000000
--- a/arch/sh64/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index c0f4ba1..527adc8 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -320,11 +320,7 @@
 
 source "fs/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/sparc/oprofile/Kconfig"
-
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sparc/Kconfig.debug"
 
diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c
index 003f8ee..fe562db 100644
--- a/arch/sparc/kernel/ptrace.c
+++ b/arch/sparc/kernel/ptrace.c
@@ -155,7 +155,7 @@
 		/* Rest of them are completely unsupported. */
 	default:
 		printk("%s [%d]: Wants to read user offset %ld\n",
-		       current->comm, current->pid, offset);
+		       current->comm, task_pid_nr(current), offset);
 		pt_error_return(regs, EIO);
 		return;
 	}
@@ -222,7 +222,7 @@
 		/* Rest of them are completely unsupported or "no-touch". */
 	default:
 		printk("%s [%d]: Wants to write user offset %ld\n",
-		       current->comm, current->pid, offset);
+		       current->comm, task_pid_nr(current), offset);
 		goto failure;
 	}
 success:
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index 6c0221e..42bf09d 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -357,7 +357,7 @@
 	if (count++ > 5)
 		return -ENOSYS;
 	printk ("%s[%d]: Unimplemented SPARC system call %d\n",
-		current->comm, current->pid, (int)regs->u_regs[1]);
+		current->comm, task_pid_nr(current), (int)regs->u_regs[1]);
 #ifdef DEBUG_UNIMP_SYSCALL	
 	show_regs (regs);
 #endif
diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c
index f807172..28c187c 100644
--- a/arch/sparc/kernel/sys_sunos.c
+++ b/arch/sparc/kernel/sys_sunos.c
@@ -866,7 +866,7 @@
 	rcu_read_lock();
 	ret = -EINVAL;
 	if (pgrp > 0)
-		ret = kill_pgrp(find_pid(pgrp), sig, 0);
+		ret = kill_pgrp(find_vpid(pgrp), sig, 0);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/arch/sparc/kernel/traps.c b/arch/sparc/kernel/traps.c
index 3bc3bff..d404e79 100644
--- a/arch/sparc/kernel/traps.c
+++ b/arch/sparc/kernel/traps.c
@@ -38,7 +38,7 @@
 
 void syscall_trace_entry(struct pt_regs *regs)
 {
-	printk("%s[%d]: ", current->comm, current->pid);
+	printk("%s[%d]: ", current->comm, task_pid_nr(current));
 	printk("scall<%d> (could be %d)\n", (int) regs->u_regs[UREG_G1],
 	       (int) regs->u_regs[UREG_I0]);
 }
@@ -99,7 +99,7 @@
 "              /_| \\__/ |_\\\n"
 "                 \\__U_/\n");
 
-	printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
+	printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
 	show_regs(regs);
 	add_taint(TAINT_DIE);
 
diff --git a/arch/sparc/oprofile/Kconfig b/arch/sparc/oprofile/Kconfig
deleted file mode 100644
index d8a8408..0000000
--- a/arch/sparc/oprofile/Kconfig
+++ /dev/null
@@ -1,17 +0,0 @@
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 59c4d75..c7a74e3 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -460,20 +460,7 @@
 
 source "fs/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/sparc64/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes (EXPERIMENTAL)"
-	depends on KALLSYMS && EXPERIMENTAL && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sparc64/Kconfig.debug"
 
diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c
index 8f7a06e..170d6ca 100644
--- a/arch/sparc64/kernel/sys_sunos32.c
+++ b/arch/sparc64/kernel/sys_sunos32.c
@@ -831,7 +831,7 @@
 	rcu_read_lock();
 	ret = -EINVAL;
 	if (pgrp > 0)
-		ret = kill_pgrp(find_pid(pgrp), sig, 0);
+		ret = kill_pgrp(find_vpid(pgrp), sig, 0);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 34573a5..e9c7e4f 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2225,7 +2225,7 @@
 "              /_| \\__/ |_\\\n"
 "                 \\__U_/\n");
 
-	printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
+	printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
 	notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
 	__asm__ __volatile__("flushw");
 	__show_regs(regs);
diff --git a/arch/sparc64/oprofile/Kconfig b/arch/sparc64/oprofile/Kconfig
deleted file mode 100644
index d8a8408..0000000
--- a/arch/sparc64/oprofile/Kconfig
+++ /dev/null
@@ -1,17 +0,0 @@
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 3b67de7..c86cb30 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -415,7 +415,7 @@
 	
 	switch (cmd) {
 	case 0: /* getpgrp */
-		return process_group(current);
+		return task_pgrp_nr(current);
 	case 1: /* setpgrp */
 		{
 			int (*sys_setpgid)(pid_t,pid_t) =
@@ -426,7 +426,7 @@
 			ret = sys_setpgid(0, 0);
 			if (ret) return ret;
 			proc_clear_tty(current);
-			return process_group(current);
+			return task_pgrp_nr(current);
 		}
 	case 2: /* getsid */
 		{
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 740d8a9..d8925d2 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -289,4 +289,6 @@
 	bool
 	default n
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/um/Kconfig.debug"
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index bd06055..cb3321f 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -108,7 +108,7 @@
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		up_read(&mm->mmap_sem);
 		yield();
 		down_read(&mm->mmap_sem);
diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/um/sys-x86_64/sysrq.c
index ce3e07f..7654440 100644
--- a/arch/um/sys-x86_64/sysrq.c
+++ b/arch/um/sys-x86_64/sysrq.c
@@ -15,8 +15,8 @@
 {
 	printk("\n");
 	print_modules();
-	printk("Pid: %d, comm: %.20s %s %s\n",
-	       current->pid, current->comm, print_tainted(), init_utsname()->release);
+	printk("Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
+		current->comm, print_tainted(), init_utsname()->release);
 	printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff,
 	       PT_REGS_RIP(regs));
 	printk("\nRSP: %016lx  EFLAGS: %08lx\n", PT_REGS_RSP(regs),
diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig
index ace479a..b6a50b8 100644
--- a/arch/v850/Kconfig
+++ b/arch/v850/Kconfig
@@ -331,6 +331,8 @@
 
 source "drivers/usb/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/v850/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c
index 5027650..55822d2 100644
--- a/arch/x86/ia32/ia32_binfmt.c
+++ b/arch/x86/ia32/ia32_binfmt.c
@@ -5,10 +5,6 @@
  * This tricks binfmt_elf.c into loading 32bit binaries using lots 
  * of ugly preprocessor tricks. Talk about very very poor man's inheritance.
  */ 
-#define __ASM_X86_64_ELF_H 1
-
-#undef ELF_CLASS
-#define ELF_CLASS ELFCLASS32
 
 #include <linux/types.h>
 #include <linux/stddef.h>
@@ -19,6 +15,7 @@
 #include <linux/binfmts.h>
 #include <linux/mm.h>
 #include <linux/security.h>
+#include <linux/elfcore-compat.h>
 
 #include <asm/segment.h> 
 #include <asm/ptrace.h>
@@ -31,6 +28,20 @@
 #include <asm/ia32.h>
 #include <asm/vsyscall32.h>
 
+#undef	ELF_ARCH
+#undef	ELF_CLASS
+#define ELF_CLASS	ELFCLASS32
+#define ELF_ARCH	EM_386
+
+#undef	elfhdr
+#undef	elf_phdr
+#undef	elf_note
+#undef	elf_addr_t
+#define elfhdr		elf32_hdr
+#define elf_phdr	elf32_phdr
+#define elf_note	elf32_note
+#define elf_addr_t	Elf32_Off
+
 #define ELF_NAME "elf/i386"
 
 #define AT_SYSINFO 32
@@ -48,74 +59,20 @@
 } while(0)
 
 struct file;
-struct elf_phdr; 
 
 #define IA32_EMULATOR 1
 
+#undef ELF_ET_DYN_BASE
+
 #define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x1000000)
 
-#undef ELF_ARCH
-#define ELF_ARCH EM_386
-
-#define ELF_DATA	ELFDATA2LSB
-
-#define USE_ELF_CORE_DUMP 1
-
-/* Override elfcore.h */ 
-#define _LINUX_ELFCORE_H 1
-typedef unsigned int elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-struct elf_siginfo
-{
-	int	si_signo;			/* signal number */
-	int	si_code;			/* extra code */
-	int	si_errno;			/* errno */
-};
-
 #define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
 
-struct elf_prstatus
-{
-	struct elf_siginfo pr_info;	/* Info associated with signal */
-	short	pr_cursig;		/* Current signal */
-	unsigned int pr_sigpend;	/* Set of pending signals */
-	unsigned int pr_sighold;	/* Set of held signals */
-	pid_t	pr_pid;
-	pid_t	pr_ppid;
-	pid_t	pr_pgrp;
-	pid_t	pr_sid;
-	struct compat_timeval pr_utime;	/* User time */
-	struct compat_timeval pr_stime;	/* System time */
-	struct compat_timeval pr_cutime;	/* Cumulative user time */
-	struct compat_timeval pr_cstime;	/* Cumulative system time */
-	elf_gregset_t pr_reg;	/* GP registers */
-	int pr_fpvalid;		/* True if math co-processor being used.  */
-};
-
-#define ELF_PRARGSZ	(80)	/* Number of chars for args */
-
-struct elf_prpsinfo
-{
-	char	pr_state;	/* numeric process state */
-	char	pr_sname;	/* char for pr_state */
-	char	pr_zomb;	/* zombie */
-	char	pr_nice;	/* nice val */
-	unsigned int pr_flag;	/* flags */
-	__u16	pr_uid;
-	__u16	pr_gid;
-	pid_t	pr_pid, pr_ppid, pr_pgrp, pr_sid;
-	/* Lots missing */
-	char	pr_fname[16];	/* filename of executable */
-	char	pr_psargs[ELF_PRARGSZ];	/* initial part of arg list */
-};
-
 #define _GET_SEG(x) \
 	({ __u32 seg; asm("movl %%" __stringify(x) ",%0" : "=r"(seg)); seg; })
 
 /* Assumes current==process to be dumped */
+#undef	ELF_CORE_COPY_REGS
 #define ELF_CORE_COPY_REGS(pr_reg, regs)       		\
 	pr_reg[0] = regs->rbx;				\
 	pr_reg[1] = regs->rcx;				\
@@ -135,36 +92,41 @@
 	pr_reg[15] = regs->rsp;				\
 	pr_reg[16] = regs->ss;
 
-#define user user32
+
+#define elf_prstatus	compat_elf_prstatus
+#define elf_prpsinfo	compat_elf_prpsinfo
+#define elf_fpregset_t	struct user_i387_ia32_struct
+#define	elf_fpxregset_t	struct user32_fxsr_struct
+#define user		user32
 
 #undef elf_read_implies_exec
 #define elf_read_implies_exec(ex, executable_stack)     (executable_stack != EXSTACK_DISABLE_X)
-//#include <asm/ia32.h>
-#include <linux/elf.h>
 
-typedef struct user_i387_ia32_struct elf_fpregset_t;
-typedef struct user32_fxsr_struct elf_fpxregset_t;
-
-
-static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
+#define elf_core_copy_regs		elf32_core_copy_regs
+static inline void elf32_core_copy_regs(compat_elf_gregset_t *elfregs,
+					struct pt_regs *regs)
 {
-	ELF_CORE_COPY_REGS((*elfregs), regs)
+	ELF_CORE_COPY_REGS((&elfregs->ebx), regs)
 }
 
-static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
+#define elf_core_copy_task_regs		elf32_core_copy_task_regs
+static inline int elf32_core_copy_task_regs(struct task_struct *t,
+					    compat_elf_gregset_t* elfregs)
 {	
 	struct pt_regs *pp = task_pt_regs(t);
-	ELF_CORE_COPY_REGS((*elfregs), pp);
+	ELF_CORE_COPY_REGS((&elfregs->ebx), pp);
 	/* fix wrong segments */ 
-	(*elfregs)[7] = t->thread.ds; 
-	(*elfregs)[9] = t->thread.fsindex; 
-	(*elfregs)[10] = t->thread.gsindex; 
-	(*elfregs)[8] = t->thread.es; 	
+	elfregs->ds = t->thread.ds;
+	elfregs->fs = t->thread.fsindex;
+	elfregs->gs = t->thread.gsindex;
+	elfregs->es = t->thread.es;
 	return 1; 
 }
 
+#define elf_core_copy_task_fpregs	elf32_core_copy_task_fpregs
 static inline int 
-elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
+elf32_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs,
+			    elf_fpregset_t *fpu)
 {
 	struct _fpstate_ia32 *fpstate = (void*)fpu; 
 	mm_segment_t oldfs = get_fs();
@@ -186,8 +148,9 @@
 
 #define ELF_CORE_COPY_XFPREGS 1
 #define ELF_CORE_XFPREG_TYPE NT_PRXFPREG
+#define elf_core_copy_task_xfpregs	elf32_core_copy_task_xfpregs
 static inline int 
-elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
+elf32_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
 {
 	struct pt_regs *regs = task_pt_regs(t);
 	if (!tsk_used_math(t))
@@ -206,6 +169,10 @@
 
 extern int force_personality32;
 
+#undef	ELF_EXEC_PAGESIZE
+#undef	ELF_HWCAP
+#undef	ELF_PLATFORM
+#undef	SET_PERSONALITY
 #define ELF_EXEC_PAGESIZE PAGE_SIZE
 #define ELF_HWCAP (boot_cpu_data.x86_capability[0])
 #define ELF_PLATFORM  ("i686")
@@ -231,6 +198,7 @@
 
 #define load_elf_binary load_elf32_binary
 
+#undef	ELF_PLAT_INIT
 #define ELF_PLAT_INIT(r, load_addr)	elf32_init(r)
 
 #undef start_thread
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a3fa11f..ccea590 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-extra-y := head_32.o init_task_32.o vmlinux.lds
+extra-y := head_32.o init_task.o vmlinux.lds
 
 obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
 		ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
@@ -17,6 +17,7 @@
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
 obj-$(CONFIG_MICROCODE)		+= microcode.o
+obj-$(CONFIG_PCI)		+= early-quirks.o
 obj-$(CONFIG_APM)		+= apm_32.o
 obj-$(CONFIG_X86_SMP)		+= smp_32.o smpboot_32.o tsc_sync.o
 obj-$(CONFIG_SMP)		+= smpcommon_32.o
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 43da662..dec06e7 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-extra-y 	:= head_64.o head64.o init_task_64.o vmlinux.lds
+extra-y 	:= head_64.o head64.o init_task.o vmlinux.lds
 EXTRA_AFLAGS	:= -traditional
 obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
 		ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \
@@ -39,7 +39,7 @@
 obj-$(CONFIG_AUDIT)		+= audit_64.o
 
 obj-$(CONFIG_MODULES)		+= module_64.o
-obj-$(CONFIG_PCI)		+= early-quirks_64.o
+obj-$(CONFIG_PCI)		+= early-quirks.o
 
 obj-y				+= topology.o
 obj-y				+= intel_cacheinfo.o
diff --git a/arch/x86/kernel/acpi/Makefile_32 b/arch/x86/kernel/acpi/Makefile_32
index a4852a2..045dd54 100644
--- a/arch/x86/kernel/acpi/Makefile_32
+++ b/arch/x86/kernel/acpi/Makefile_32
@@ -1,7 +1,4 @@
 obj-$(CONFIG_ACPI)		+= boot.o
-ifneq ($(CONFIG_PCI),)
-obj-$(CONFIG_X86_IO_APIC)	+= earlyquirk_32.o
-endif
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep_32.o wakeup_32.o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index afd2afe..f28b2e2 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -555,7 +555,7 @@
 
 int acpi_unmap_lsapic(int cpu)
 {
-	x86_cpu_to_apicid[cpu] = -1;
+	per_cpu(x86_cpu_to_apicid, cpu) = -1;
 	cpu_clear(cpu, cpu_present_map);
 	num_processors--;
 
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 2d39f55..10b6717 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -29,7 +29,7 @@
 void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
 					unsigned int cpu)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	flags->bm_check = 0;
 	if (num_online_cpus() == 1)
@@ -72,7 +72,7 @@
 		struct acpi_processor_cx *cx, struct acpi_power_register *reg)
 {
 	struct cstate_entry *percpu_entry;
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	cpumask_t saved_mask;
 	int retval;
diff --git a/arch/x86/kernel/acpi/earlyquirk_32.c b/arch/x86/kernel/acpi/earlyquirk_32.c
deleted file mode 100644
index 23f78ef..0000000
--- a/arch/x86/kernel/acpi/earlyquirk_32.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* 
- * Do early PCI probing for bug detection when the main PCI subsystem is 
- * not up yet.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/acpi.h>
-
-#include <asm/pci-direct.h>
-#include <asm/acpi.h>
-#include <asm/apic.h>
-
-#ifdef CONFIG_ACPI
-
-static int __init nvidia_hpet_check(struct acpi_table_header *header)
-{
-	return 0;
-}
-#endif
-
-static int __init check_bridge(int vendor, int device)
-{
-#ifdef CONFIG_ACPI
-	static int warned;
-	/* According to Nvidia all timer overrides are bogus unless HPET
-	   is enabled. */
-	if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) {
-		if (!warned && acpi_table_parse(ACPI_SIG_HPET,
-						nvidia_hpet_check)) {
-			warned = 1;
-			acpi_skip_timer_override = 1;
-			  printk(KERN_INFO "Nvidia board "
-                       "detected. Ignoring ACPI "
-                       "timer override.\n");
-                printk(KERN_INFO "If you got timer trouble "
-			 	 "try acpi_use_timer_override\n");
-
-		}
-	}
-#endif
-	if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) {
-		timer_over_8254 = 0;
-		printk(KERN_INFO "ATI board detected. Disabling timer routing "
-				"over 8254.\n");
-	}
-	return 0;
-}
-
-void __init check_acpi_pci(void)
-{
-	int num, slot, func;
-
-	/* Assume the machine supports type 1. If not it will 
-	   always read ffffffff and should not have any side effect.
-	   Actually a few buggy systems can machine check. Allow the user
-	   to disable it by command line option at least -AK */
-	if (!early_pci_allowed())
-		return;
-
-	/* Poor man's PCI discovery */
-	for (num = 0; num < 32; num++) {
-		for (slot = 0; slot < 32; slot++) {
-			for (func = 0; func < 8; func++) {
-				u32 class;
-				u32 vendor;
-				class = read_pci_config(num, slot, func,
-							PCI_CLASS_REVISION);
-				if (class == 0xffffffff)
-					break;
-
-				if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
-					continue;
-
-				vendor = read_pci_config(num, slot, func,
-							 PCI_VENDOR_ID);
-
-				if (check_bridge(vendor & 0xffff, vendor >> 16))
-					return;
-			}
-
-		}
-	}
-}
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index b54fded..2ed0a4c 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -63,7 +63,7 @@
 void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
 {
 	unsigned int cpu = pr->id;
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	pr->pdc = NULL;
 	if (c->x86_vendor == X86_VENDOR_INTEL)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 3bd2688..d6405e0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -357,14 +357,14 @@
 	if (smp) {
 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
 		clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-		clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+		clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_lock(mod->locks, mod->locks_end,
 					      mod->text, mod->text_end);
 	} else {
 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 		set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-		set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+		set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_unlock(mod->locks, mod->locks_end,
 						mod->text, mod->text_end);
@@ -432,7 +432,7 @@
 		if (1 == num_possible_cpus()) {
 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 			set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-			set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+			set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
 						_text, _etext);
 		}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig_32
similarity index 100%
rename from arch/x86/kernel/cpu/cpufreq/Kconfig
rename to arch/x86/kernel/cpu/cpufreq/Kconfig_32
diff --git a/arch/x86/kernel/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig_64
similarity index 97%
rename from arch/x86/kernel/cpufreq/Kconfig
rename to arch/x86/kernel/cpu/cpufreq/Kconfig_64
index a3fd519..9c9699f 100644
--- a/arch/x86/kernel/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig_64
@@ -19,7 +19,7 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called powernow-k8.
 
-	  For details, take a look at <file:Documentation/cpu-freq/>. 
+	  For details, take a look at <file:Documentation/cpu-freq/>.
 
 	  If in doubt, say N.
 
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 2ca43ba..fea0af0 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -77,7 +77,7 @@
 
 static int check_est_cpu(unsigned int cpuid)
 {
-	struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
+	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
 
 	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
 	    !cpu_has(cpu, X86_FEATURE_EST))
@@ -560,7 +560,7 @@
 	unsigned int cpu = policy->cpu;
 	struct acpi_cpufreq_data *data;
 	unsigned int result = 0;
-	struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
 	struct acpi_processor_performance *perf;
 
 	dprintk("acpi_cpufreq_cpu_init\n");
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c11baaf..326a4c8 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -305,7 +305,7 @@
 
 static int __init eps_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	/* This driver will work only on Centaur C7 processors with
 	 * Enhanced SpeedStep/PowerSaver registers */
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 1e7ae7d..94619c2 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -199,7 +199,7 @@
 
 static int elanfreq_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	unsigned int i;
 	int result;
 
@@ -280,7 +280,7 @@
 
 static int __init elanfreq_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	/* Test if we have the right hardware */
 	if ((c->x86_vendor != X86_VENDOR_AMD) ||
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 5045f5d..749d00c 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -780,7 +780,7 @@
 
 static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	char *cpuname=NULL;
 	int ret;
 	u32 lo, hi;
@@ -959,7 +959,7 @@
 
 static int __init longhaul_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
 		return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index b268951..af4a867 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -172,7 +172,7 @@
 	u32 save_lo, save_hi;
 	u32 eax, ebx, ecx, edx;
 	u32 try_hi;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	if (!low_freq || !high_freq)
 		return -EINVAL;
@@ -298,7 +298,7 @@
  */
 static int __init longrun_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
 	    !cpu_has(c, X86_FEATURE_LONGRUN))
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 793eae8..14791ec 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -195,7 +195,7 @@
 
 static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
 	int cpuid = 0;
 	unsigned int i;
 
@@ -279,7 +279,7 @@
 
 static int __init cpufreq_p4_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	int ret;
 
 	/*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index 6d02853..42405b4 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -215,7 +215,7 @@
  */
 static int __init powernow_k6_init(void)
 {
-	struct cpuinfo_x86      *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
 		((c->x86_model != 12) && (c->x86_model != 13)))
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index f3686a5..b5a9863 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -114,7 +114,7 @@
 
 static int check_powernow(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	unsigned int maxei, eax, ebx, ecx, edx;
 
 	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) {
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index d9f3e90..42da9bd 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -102,7 +102,7 @@
 
 static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	int result;
 
 	/* capability check */
@@ -151,7 +151,7 @@
 
 static int __init sc520_freq_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	int err;
 
 	/* Test if we have the right hardware */
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 811d474..3031f11 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -230,7 +230,7 @@
 
 static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
 	struct cpu_model *model;
 
 	for(model = models; model->cpu_id != NULL; model++)
@@ -340,7 +340,7 @@
 
 static int centrino_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
 	unsigned freq;
 	unsigned l, h;
 	int ret;
@@ -612,7 +612,7 @@
  */
 static int __init centrino_init(void)
 {
-	struct cpuinfo_x86 *cpu = cpu_data;
+	struct cpuinfo_x86 *cpu = &cpu_data(0);
 
 	if (!cpu_has(cpu, X86_FEATURE_EST))
 		return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index b1acc8c..76c3ab0 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -228,7 +228,7 @@
 
 unsigned int speedstep_detect_processor (void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 ebx, msr_lo, msr_hi;
 
 	dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 297a241..9921b01 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -295,7 +295,7 @@
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 #ifdef CONFIG_X86_HT
-	unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
+	unsigned int cpu = c->cpu_index;
 #endif
 
 	if (c->cpuid_level > 3) {
@@ -417,14 +417,14 @@
 	if (new_l2) {
 		l2 = new_l2;
 #ifdef CONFIG_X86_HT
-		cpu_llc_id[cpu] = l2_id;
+		per_cpu(cpu_llc_id, cpu) = l2_id;
 #endif
 	}
 
 	if (new_l3) {
 		l3 = new_l3;
 #ifdef CONFIG_X86_HT
-		cpu_llc_id[cpu] = l3_id;
+		per_cpu(cpu_llc_id, cpu) = l3_id;
 #endif
 	}
 
@@ -459,7 +459,7 @@
 	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 	unsigned long num_threads_sharing;
 	int index_msb, i;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
 	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
@@ -470,8 +470,8 @@
 		index_msb = get_count_order(num_threads_sharing);
 
 		for_each_online_cpu(i) {
-			if (c[i].apicid >> index_msb ==
-			    c[cpu].apicid >> index_msb) {
+			if (cpu_data(i).apicid >> index_msb ==
+			    c->apicid >> index_msb) {
 				cpu_set(i, this_leaf->shared_cpu_map);
 				if (i != cpu && cpuid4_info[i])  {
 					sibling_leaf = CPUID4_INFO_IDX(i, index);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 54cdbf1..c02541e 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -120,7 +120,9 @@
 	unsigned int counter;
 
 	counter = nmi_perfctr_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+	/* register not managed by the allocator? */
+	if (counter > NMI_MAX_COUNTER_BITS)
+		return 1;
 
 	if (!test_and_set_bit(counter, perfctr_nmi_owner))
 		return 1;
@@ -132,7 +134,9 @@
 	unsigned int counter;
 
 	counter = nmi_perfctr_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+	/* register not managed by the allocator? */
+	if (counter > NMI_MAX_COUNTER_BITS)
+		return;
 
 	clear_bit(counter, perfctr_nmi_owner);
 }
@@ -142,7 +146,9 @@
 	unsigned int counter;
 
 	counter = nmi_evntsel_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+	/* register not managed by the allocator? */
+	if (counter > NMI_MAX_COUNTER_BITS)
+		return 1;
 
 	if (!test_and_set_bit(counter, evntsel_nmi_owner))
 		return 1;
@@ -154,7 +160,9 @@
 	unsigned int counter;
 
 	counter = nmi_evntsel_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+	/* register not managed by the allocator? */
+	if (counter > NMI_MAX_COUNTER_BITS)
+		return;
 
 	clear_bit(counter, evntsel_nmi_owner);
 }
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 879a0f7..2d42b41 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -85,12 +85,13 @@
 		/* nothing */
 	};
 	struct cpuinfo_x86 *c = v;
-	int i, n = c - cpu_data;
+	int i, n = 0;
 	int fpu_exception;
 
 #ifdef CONFIG_SMP
 	if (!cpu_online(n))
 		return 0;
+	n = c->cpu_index;
 #endif
 	seq_printf(m, "processor\t: %d\n"
 		"vendor_id\t: %s\n"
@@ -175,11 +176,15 @@
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+	if (*pos == 0)	/* just in case, cpu 0 is not the first */
+		*pos = first_cpu(cpu_possible_map);
+	if ((*pos) < NR_CPUS && cpu_possible(*pos))
+		return &cpu_data(*pos);
+	return NULL;
 }
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	++*pos;
+	*pos = next_cpu(*pos, cpu_possible_map);
 	return c_start(m, pos);
 }
 static void c_stop(struct seq_file *m, void *v)
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 70dcf91..05c9936 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -114,7 +114,7 @@
 static int cpuid_open(struct inode *inode, struct file *file)
 {
 	unsigned int cpu = iminor(file->f_path.dentry->d_inode);
-	struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	if (cpu >= NR_CPUS || !cpu_online(cpu))
 		return -ENXIO;	/* No such CPU */
@@ -134,15 +134,18 @@
 	.open = cpuid_open,
 };
 
-static int __cpuinit cpuid_device_create(int i)
+static __cpuinit int cpuid_device_create(int cpu)
 {
-	int err = 0;
 	struct device *dev;
 
-	dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), "cpu%d",i);
-	if (IS_ERR(dev))
-		err = PTR_ERR(dev);
-	return err;
+	dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu),
+			    "cpu%d", cpu);
+	return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+static void cpuid_device_destroy(int cpu)
+{
+	device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
 }
 
 static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb,
@@ -150,18 +153,21 @@
 					      void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
+	int err = 0;
 
 	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		cpuid_device_create(cpu);
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		err = cpuid_device_create(cpu);
 		break;
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-		device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		cpuid_device_destroy(cpu);
 		break;
 	}
-	return NOTIFY_OK;
+	return err ? NOTIFY_BAD : NOTIFY_OK;
 }
 
 static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier =
@@ -198,7 +204,7 @@
 out_class:
 	i = 0;
 	for_each_online_cpu(i) {
-		device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i));
+		cpuid_device_destroy(i);
 	}
 	class_destroy(cpuid_class);
 out_chrdev:
@@ -212,7 +218,7 @@
 	int cpu = 0;
 
 	for_each_online_cpu(cpu)
-		device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		cpuid_device_destroy(cpu);
 	class_destroy(cpuid_class);
 	unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
 	unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 32e75d0..72d0c56 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -47,6 +47,7 @@
 		if (!kdump_buf_page) {
 			printk(KERN_WARNING "Kdump: Kdump buffer page not"
 				" allocated\n");
+			kunmap_atomic(vaddr, KM_PTE0);
 			return -EFAULT;
 		}
 		copy_page(kdump_buf_page, vaddr);
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 3c86b97..d58039e 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -288,7 +288,8 @@
 			request_resource(res, code_resource);
 			request_resource(res, data_resource);
 #ifdef CONFIG_KEXEC
-			request_resource(res, &crashk_res);
+			if (crashk_res.start != crashk_res.end)
+				request_resource(res, &crashk_res);
 #endif
 		}
 	}
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index e422b81..5761686 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -226,7 +226,8 @@
 			request_resource(res, &code_resource);
 			request_resource(res, &data_resource);
 #ifdef CONFIG_KEXEC
-			request_resource(res, &crashk_res);
+			if (crashk_res.start != crashk_res.end)
+				request_resource(res, &crashk_res);
 #endif
 		}
 	}
diff --git a/arch/x86/kernel/early-quirks_64.c b/arch/x86/kernel/early-quirks.c
similarity index 88%
rename from arch/x86/kernel/early-quirks_64.c
rename to arch/x86/kernel/early-quirks.c
index 13aa4fd..dc34acb 100644
--- a/arch/x86/kernel/early-quirks_64.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -13,9 +13,13 @@
 #include <linux/acpi.h>
 #include <linux/pci_ids.h>
 #include <asm/pci-direct.h>
-#include <asm/proto.h>
-#include <asm/iommu.h>
 #include <asm/dma.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+
+#ifdef CONFIG_IOMMU
+#include <asm/iommu.h>
+#endif
 
 static void __init via_bugs(void)
 {
@@ -23,7 +27,8 @@
 	if ((end_pfn > MAX_DMA32_PFN ||  force_iommu) &&
 	    !iommu_aperture_allowed) {
 		printk(KERN_INFO
-  "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n");
+		       "Looks like a VIA chipset. Disabling IOMMU."
+		       " Override with iommu=allowed\n");
 		iommu_aperture_disabled = 1;
 	}
 #endif
@@ -40,6 +45,7 @@
 static void __init nvidia_bugs(void)
 {
 #ifdef CONFIG_ACPI
+#ifdef CONFIG_X86_IO_APIC
 	/*
 	 * All timer overrides on Nvidia are
 	 * wrong unless HPET is enabled.
@@ -59,17 +65,20 @@
 			"try acpi_use_timer_override\n");
 	}
 #endif
+#endif
 	/* RED-PEN skip them on mptables too? */
 
 }
 
 static void __init ati_bugs(void)
 {
+#ifdef CONFIG_X86_IO_APIC
 	if (timer_over_8254 == 1) {
 		timer_over_8254 = 0;
 		printk(KERN_INFO
-	 	"ATI board detected. Disabling timer routing over 8254.\n");
+		"ATI board detected. Disabling timer routing over 8254.\n");
 	}
+#endif
 }
 
 struct chipset {
@@ -104,7 +113,7 @@
 				if (class == 0xffffffff)
 					break;
 
-		       		if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
+				if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
 					continue;
 
 				vendor = read_pci_config(num, slot, func,
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 4ae03e3..ce703e2 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -24,10 +24,19 @@
 #include <acpi/acpi_bus.h>
 #endif
 
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
+/*
+ * which logical CPU number maps to which CPU (physical APIC ID)
+ *
+ * The following static array is used during kernel startup
+ * and the x86_cpu_to_apicid_ptr contains the address of the
+ * array during this time.  Is it zeroed when the per_cpu
+ * data area is removed.
+ */
+u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata
 					= { [0 ... NR_CPUS-1] = BAD_APICID };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
+void *x86_cpu_to_apicid_ptr;
+DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 
 struct genapic __read_mostly *genapic = &apic_flat;
 
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 91c75267..07352b7 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -172,7 +172,7 @@
 	 */
 	cpu = first_cpu(cpumask);
 	if ((unsigned)cpu < NR_CPUS)
-		return x86_cpu_to_apicid[cpu];
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
 }
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index a7eee0a..6b34693 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -58,7 +58,7 @@
 
 	for (i = 0; i < IDT_ENTRIES; i++)
 		set_intr_gate(i, early_idt_handler);
-	asm volatile("lidt %0" :: "m" (idt_descr));
+	load_idt((const struct desc_ptr *)&idt_descr);
 
 	early_printk("Kernel alive\n");
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index f836707..22d8f00 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -69,12 +69,15 @@
  * HPET command line enable / disable
  */
 static int boot_hpet_disable;
+int hpet_force_user;
 
 static int __init hpet_setup(char* str)
 {
 	if (str) {
 		if (!strncmp("disable", str, 7))
 			boot_hpet_disable = 1;
+		if (!strncmp("force", str, 5))
+			hpet_force_user = 1;
 	}
 	return 1;
 }
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index d34a10c..f634fc7 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -403,7 +403,8 @@
 		int vector = FIRST_EXTERNAL_VECTOR + i;
 		if (i >= NR_IRQS)
 			break;
-		if (vector != SYSCALL_VECTOR) 
+		/* SYSCALL_VECTOR was reserved in trap_init. */
+		if (!test_bit(vector, used_vectors))
 			set_intr_gate(vector, interrupt[i]);
 	}
 
diff --git a/arch/x86/kernel/init_task_32.c b/arch/x86/kernel/init_task.c
similarity index 79%
rename from arch/x86/kernel/init_task_32.c
rename to arch/x86/kernel/init_task.c
index d26fc06..468c9c4 100644
--- a/arch/x86/kernel/init_task_32.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,7 +15,6 @@
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
-
 EXPORT_SYMBOL(init_mm);
 
 /*
@@ -25,7 +24,7 @@
  * way process stacks are handled. This is done by having a special
  * "init_task" linker map entry..
  */
-union thread_union init_thread_union 
+union thread_union init_thread_union
 	__attribute__((__section__(".data.init_task"))) =
 		{ INIT_THREAD_INFO(init_task) };
 
@@ -35,12 +34,14 @@
  * All other task structs will be allocated on slabs in fork.c
  */
 struct task_struct init_task = INIT_TASK(init_task);
-
 EXPORT_SYMBOL(init_task);
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's.
- */ 
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
 
diff --git a/arch/x86/kernel/init_task_64.c b/arch/x86/kernel/init_task_64.c
deleted file mode 100644
index 4ff33d4..0000000
--- a/arch/x86/kernel/init_task_64.c
+++ /dev/null
@@ -1,54 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial task structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union 
-	__attribute__((__section__(".data.init_task"))) =
-		{ INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */ 
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
-
-/* Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-
-#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 5f10c71..0c55e9d 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1198,7 +1198,7 @@
 static int __assign_irq_vector(int irq)
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
-	int vector, offset, i;
+	int vector, offset;
 
 	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
 
@@ -1215,11 +1215,8 @@
 	}
 	if (vector == current_vector)
 		return -ENOSPC;
-	if (vector == SYSCALL_VECTOR)
+	if (test_and_set_bit(vector, used_vectors))
 		goto next;
-	for (i = 0; i < NR_IRQ_VECTORS; i++)
-		if (irq_vector[i] == vector)
-			goto next;
 
 	current_vector = vector;
 	current_offset = offset;
@@ -2295,6 +2292,12 @@
 
 void __init setup_IO_APIC(void)
 {
+	int i;
+
+	/* Reserve all the system vectors. */
+	for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++)
+		set_bit(i, used_vectors);
+
 	enable_IO_APIC();
 
 	if (acpi_ioapic)
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8459ca6..11b935f 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -149,28 +149,6 @@
 			image->start, cpu_has_pae);
 }
 
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel.  By reserving this memory we guarantee
- * that linux never sets it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init parse_crashkernel(char *arg)
-{
-	unsigned long size, base;
-	size = memparse(arg, &arg);
-	if (*arg == '@') {
-		base = memparse(arg+1, &arg);
-		/* FIXME: Do I want a sanity check
-		 * to validate the memory range?
-		 */
-		crashk_res.start = base;
-		crashk_res.end   = base + size - 1;
-	}
-	return 0;
-}
-early_param("crashkernel", parse_crashkernel);
-
 void arch_crash_save_vmcoreinfo(void)
 {
 #ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7450b69..0d8577f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -231,33 +231,6 @@
 			image->start);
 }
 
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel.  By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init setup_crashkernel(char *arg)
-{
-	unsigned long size, base;
-	char *p;
-	if (!arg)
-		return -EINVAL;
-	size = memparse(arg, &p);
-	if (arg == p)
-		return -EINVAL;
-	if (*p == '@') {
-		base = memparse(p+1, &p);
-		/* FIXME: Do I want a sanity check to validate the
-		 * memory range?  Yes you do, but it's too early for
-		 * e820 -AK */
-		crashk_res.start = base;
-		crashk_res.end   = base + size - 1;
-	}
-	return 0;
-}
-early_param("crashkernel", setup_crashkernel);
-
 void arch_crash_save_vmcoreinfo(void)
 {
 #ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/mce_64.c
index 66e6b79..2cf20de 100644
--- a/arch/x86/kernel/mce_64.c
+++ b/arch/x86/kernel/mce_64.c
@@ -799,7 +799,8 @@
 {
 	int err;
 	int i;
-	if (!mce_available(&cpu_data[cpu]))
+
+	if (!mce_available(&cpu_data(cpu)))
 		return -EIO;
 
 	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
diff --git a/arch/x86/kernel/mce_amd_64.c b/arch/x86/kernel/mce_amd_64.c
index 0d2afd9..752fb16 100644
--- a/arch/x86/kernel/mce_amd_64.c
+++ b/arch/x86/kernel/mce_amd_64.c
@@ -472,11 +472,11 @@
 	sprintf(name, "threshold_bank%i", bank);
 
 #ifdef CONFIG_SMP
-	if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) {	/* symlink */
+	if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {	/* symlink */
 		i = first_cpu(per_cpu(cpu_core_map, cpu));
 
 		/* first core not up yet */
-		if (cpu_data[i].cpu_core_id)
+		if (cpu_data(i).cpu_core_id)
 			goto out;
 
 		/* already linked */
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 09cf781..09c3152 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -132,7 +132,7 @@
 
 static void collect_cpu_info(int cpu_num)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu_num;
+	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 	unsigned int val[2];
 
@@ -522,7 +522,7 @@
 static int cpu_request_microcode(int cpu)
 {
 	char name[30];
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	const struct firmware *firmware;
 	void *buf;
 	unsigned long size;
@@ -570,7 +570,7 @@
 
 static int apply_microcode_check_cpu(int cpu)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	cpumask_t old;
 	unsigned int val[2];
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
index 8bf0ca0..ef4aab1 100644
--- a/arch/x86/kernel/mpparse_64.c
+++ b/arch/x86/kernel/mpparse_64.c
@@ -57,6 +57,8 @@
 
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_id = -1U;
+EXPORT_SYMBOL(boot_cpu_id);
+
 /* Internal processor count */
 unsigned int num_processors __cpuinitdata = 0;
 
@@ -86,7 +88,7 @@
 	return sum & 0xFF;
 }
 
-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 {
 	int cpu;
 	cpumask_t tmp_map;
@@ -123,7 +125,18 @@
 		cpu = 0;
  	}
 	bios_cpu_apicid[cpu] = m->mpc_apicid;
-	x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+	/*
+	 * We get called early in the the start_kernel initialization
+	 * process when the per_cpu data area is not yet setup, so we
+	 * use a static array that is removed after the per_cpu data
+	 * area is created.
+	 */
+	if (x86_cpu_to_apicid_ptr) {
+		u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr;
+		x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+	} else {
+		per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
+	}
 
 	cpu_set(cpu, cpu_possible_map);
 	cpu_set(cpu, cpu_present_map);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index e18e516..ee6eba4 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -112,7 +112,7 @@
 static int msr_open(struct inode *inode, struct file *file)
 {
 	unsigned int cpu = iminor(file->f_path.dentry->d_inode);
-	struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	if (cpu >= NR_CPUS || !cpu_online(cpu))
 		return -ENXIO;	/* No such CPU */
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index b2b42bd..afaf9f1 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -11,7 +11,7 @@
 #include <asm/iommu.h>
 #include <asm/calgary.h>
 
-int iommu_merge __read_mostly = 0;
+int iommu_merge __read_mostly = 1;
 EXPORT_SYMBOL(iommu_merge);
 
 dma_addr_t bad_dma_address __read_mostly;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 097aeaf..7b89958 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -295,34 +295,52 @@
 }
 early_param("idle", idle_setup);
 
-void show_regs(struct pt_regs * regs)
+void __show_registers(struct pt_regs *regs, int all)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
 	unsigned long d0, d1, d2, d3, d6, d7;
+	unsigned long esp;
+	unsigned short ss, gs;
+
+	if (user_mode_vm(regs)) {
+		esp = regs->esp;
+		ss = regs->xss & 0xffff;
+		savesegment(gs, gs);
+	} else {
+		esp = (unsigned long) (&regs->esp);
+		savesegment(ss, ss);
+		savesegment(gs, gs);
+	}
 
 	printk("\n");
-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
-	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
+	printk("Pid: %d, comm: %s %s (%s %.*s)\n",
+			task_pid_nr(current), current->comm,
+			print_tainted(), init_utsname()->release,
+			(int)strcspn(init_utsname()->version, " "),
+			init_utsname()->version);
+
+	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+			0xffff & regs->xcs, regs->eip, regs->eflags,
+			smp_processor_id());
 	print_symbol("EIP is at %s\n", regs->eip);
 
-	if (user_mode_vm(regs))
-		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
-	printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
-	       regs->eflags, print_tainted(), init_utsname()->release,
-	       (int)strcspn(init_utsname()->version, " "),
-	       init_utsname()->version);
 	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-		regs->eax,regs->ebx,regs->ecx,regs->edx);
-	printk("ESI: %08lx EDI: %08lx EBP: %08lx",
-		regs->esi, regs->edi, regs->ebp);
-	printk(" DS: %04x ES: %04x FS: %04x\n",
-	       0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
+		regs->eax, regs->ebx, regs->ecx, regs->edx);
+	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+		regs->esi, regs->edi, regs->ebp, esp);
+	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+	       regs->xds & 0xffff, regs->xes & 0xffff,
+	       regs->xfs & 0xffff, gs, ss);
+
+	if (!all)
+		return;
 
 	cr0 = read_cr0();
 	cr2 = read_cr2();
 	cr3 = read_cr3();
 	cr4 = read_cr4_safe();
-	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+			cr0, cr2, cr3, cr4);
 
 	get_debugreg(d0, 0);
 	get_debugreg(d1, 1);
@@ -330,10 +348,16 @@
 	get_debugreg(d3, 3);
 	printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
 			d0, d1, d2, d3);
+
 	get_debugreg(d6, 6);
 	get_debugreg(d7, 7);
-	printk("DR6: %08lx DR7: %08lx\n", d6, d7);
+	printk("DR6: %08lx DR7: %08lx\n",
+			d6, d7);
+}
 
+void show_regs(struct pt_regs *regs)
+{
+	__show_registers(regs, 1);
 	show_trace(NULL, regs, &regs->esp);
 }
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index d769e20..a4ce191 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -45,9 +45,12 @@
 	if (!(config & 0x2))
 		pci_write_config_byte(dev, 0xf4, config);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7320_MCH,	quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7525_MCH,	quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7520_MCH,	quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH,
+			quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH,
+			quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH,
+			quirk_intel_irqbalance);
 #endif
 
 #if defined(CONFIG_HPET_TIMER)
@@ -56,7 +59,8 @@
 static enum {
 	NONE_FORCE_HPET_RESUME,
 	OLD_ICH_FORCE_HPET_RESUME,
-	ICH_FORCE_HPET_RESUME
+	ICH_FORCE_HPET_RESUME,
+	VT8237_FORCE_HPET_RESUME
 } force_hpet_resume_type;
 
 static void __iomem *rcba_base;
@@ -146,17 +150,17 @@
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 
 
 static struct pci_dev *cached_dev;
@@ -232,10 +236,91 @@
 	printk(KERN_DEBUG "Failed to force enable HPET\n");
 }
 
+/*
+ * Undocumented chipset features. Make sure that the user enforced
+ * this.
+ */
+static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
+{
+	if (hpet_force_user)
+		old_ich_force_enable_hpet(dev);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
+			 old_ich_force_enable_hpet_user);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
+			 old_ich_force_enable_hpet_user);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0,
+			 old_ich_force_enable_hpet_user);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12,
+			 old_ich_force_enable_hpet_user);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0,
-                         old_ich_force_enable_hpet);
+			 old_ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12,
-                         old_ich_force_enable_hpet);
+			 old_ich_force_enable_hpet);
+
+
+static void vt8237_force_hpet_resume(void)
+{
+	u32 val;
+
+	if (!force_hpet_address || !cached_dev)
+		return;
+
+	val = 0xfed00000 | 0x80;
+	pci_write_config_dword(cached_dev, 0x68, val);
+
+	pci_read_config_dword(cached_dev, 0x68, &val);
+	if (val & 0x80)
+		printk(KERN_DEBUG "Force enabled HPET at resume\n");
+	else
+		BUG();
+}
+
+static void vt8237_force_enable_hpet(struct pci_dev *dev)
+{
+	u32 uninitialized_var(val);
+
+	if (!hpet_force_user || hpet_address || force_hpet_address)
+		return;
+
+	pci_read_config_dword(dev, 0x68, &val);
+	/*
+	 * Bit 7 is HPET enable bit.
+	 * Bit 31:10 is HPET base address (contrary to what datasheet claims)
+	 */
+	if (val & 0x80) {
+		force_hpet_address = (val & ~0x3ff);
+		printk(KERN_DEBUG "HPET at base address 0x%lx\n",
+			       force_hpet_address);
+		return;
+	}
+
+	/*
+	 * HPET is disabled. Trying enabling at FED00000 and check
+	 * whether it sticks
+	 */
+	val = 0xfed00000 | 0x80;
+	pci_write_config_dword(dev, 0x68, val);
+
+	pci_read_config_dword(dev, 0x68, &val);
+	if (val & 0x80) {
+		force_hpet_address = (val & ~0x3ff);
+		printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
+			       force_hpet_address);
+		cached_dev = dev;
+		force_hpet_resume_type = VT8237_FORCE_HPET_RESUME;
+		return;
+	}
+
+	printk(KERN_DEBUG "Failed to force enable HPET\n");
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
+			 vt8237_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
+			 vt8237_force_enable_hpet);
+
 
 void force_hpet_resume(void)
 {
@@ -246,6 +331,9 @@
 	    case OLD_ICH_FORCE_HPET_RESUME:
 		return old_ich_force_hpet_resume();
 
+	    case VT8237_FORCE_HPET_RESUME:
+		return vt8237_force_hpet_resume();
+
 	    default:
 		break;
 	}
diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c
index 368db2b..776eb06 100644
--- a/arch/x86/kernel/reboot_64.c
+++ b/arch/x86/kernel/reboot_64.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <asm/io.h>
 #include <asm/delay.h>
+#include <asm/desc.h>
 #include <asm/hw_irq.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
@@ -136,7 +137,7 @@
 		}
 
 		case BOOT_TRIPLE: 
-			__asm__ __volatile__("lidt (%0)": :"r" (&no_idt));
+			load_idt((const struct desc_ptr *)&no_idt);
 			__asm__ __volatile__("int3");
 
 			reboot_type = BOOT_KBD;
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index 8b30b26..1a07bbe 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -12,6 +12,7 @@
 #include <linux/interrupt.h>
 #include <asm/reboot_fixups.h>
 #include <asm/msr.h>
+#include <asm/geode.h>
 
 static void cs5530a_warm_reset(struct pci_dev *dev)
 {
@@ -24,11 +25,8 @@
 
 static void cs5536_warm_reset(struct pci_dev *dev)
 {
-	/*
-	 * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET)
-	 * writing 1 to the LSB of this MSR causes a hard reset.
-	 */
-	wrmsrl(0x51400017, 1ULL);
+	/* writing 1 to the LSB of this MSR causes a hard reset */
+	wrmsrl(MSR_DIVIL_SOFT_RESET, 1ULL);
 	udelay(50); /* shouldn't get here but be safe and spin a while */
 }
 
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index ba91882..3558ac7 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -185,6 +185,12 @@
 unsigned long kernel_eflags;
 
 /*
+ * Copies of the original ist values from the tss are only accessed during
+ * debugging, no special alignment required.
+ */
+DEFINE_PER_CPU(struct orig_ist, orig_ist);
+
+/*
  * cpu_init() initializes state that is per-CPU. Some data is already
  * initialized (naturally) in the bootstrap process, such as the GDT
  * and IDT. We reload them nevertheless, this function acts as a
@@ -224,8 +230,8 @@
  		memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
 
 	cpu_gdt_descr[cpu].size = GDT_SIZE;
-	asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
-	asm volatile("lidt %0" :: "m" (idt_descr));
+	load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]);
+	load_idt((const struct desc_ptr *)&idt_descr);
 
 	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
 	syscall_init();
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index b87a6fd..e4f1991 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -378,6 +378,49 @@
 extern void zone_sizes_init(void);
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
+static inline unsigned long long get_total_mem(void)
+{
+	unsigned long long total;
+
+	total = max_low_pfn - min_low_pfn;
+#ifdef CONFIG_HIGHMEM
+	total += highend_pfn - highstart_pfn;
+#endif
+
+	return total << PAGE_SHIFT;
+}
+
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long total_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	total_mem = get_total_mem();
+
+	ret = parse_crashkernel(boot_command_line, total_mem,
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size > 0) {
+		if (crash_base > 0) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(crash_size >> 20),
+					(unsigned long)(crash_base >> 20),
+					(unsigned long)(total_mem >> 20));
+			crashk_res.start = crash_base;
+			crashk_res.end   = crash_base + crash_size - 1;
+			reserve_bootmem(crash_base, crash_size);
+		} else
+			printk(KERN_INFO "crashkernel reservation failed - "
+					"you have to specify a base address\n");
+	}
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
 void __init setup_bootmem_allocator(void)
 {
 	unsigned long bootmap_size;
@@ -453,11 +496,7 @@
 		}
 	}
 #endif
-#ifdef CONFIG_KEXEC
-	if (crashk_res.start != crashk_res.end)
-		reserve_bootmem(crashk_res.start,
-			crashk_res.end - crashk_res.start + 1);
-#endif
+	reserve_crashkernel();
 }
 
 /*
@@ -622,9 +661,7 @@
 #endif
 
 #ifdef CONFIG_PCI
-#ifdef CONFIG_X86_IO_APIC
-	check_acpi_pci();	/* Checks more than just ACPI actually */
-#endif
+	early_quirks();
 #endif
 
 #ifdef CONFIG_ACPI
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 5a19f0c..31322d4 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -191,6 +191,37 @@
 }
 #endif
 
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long free_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+
+	ret = parse_crashkernel(boot_command_line, free_mem,
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size) {
+		if (crash_base > 0) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(crash_size >> 20),
+					(unsigned long)(crash_base >> 20),
+					(unsigned long)(free_mem >> 20));
+			crashk_res.start = crash_base;
+			crashk_res.end   = crash_base + crash_size - 1;
+			reserve_bootmem(crash_base, crash_size);
+		} else
+			printk(KERN_INFO "crashkernel reservation failed - "
+					"you have to specify a base address\n");
+	}
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
 #define EBDA_ADDR_POINTER 0x40E
 
 unsigned __initdata ebda_addr;
@@ -271,6 +302,11 @@
 
 	dmi_scan_machine();
 
+#ifdef CONFIG_SMP
+	/* setup to use the static apicid table during kernel startup */
+	x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
+#endif
+
 #ifdef CONFIG_ACPI
 	/*
 	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -357,13 +393,7 @@
 		}
 	}
 #endif
-#ifdef CONFIG_KEXEC
-	if (crashk_res.start != crashk_res.end) {
-		reserve_bootmem_generic(crashk_res.start,
-			crashk_res.end - crashk_res.start + 1);
-	}
-#endif
-
+	reserve_crashkernel();
 	paging_init();
 
 #ifdef CONFIG_PCI
@@ -529,7 +559,7 @@
  		   but in the same order as the HT nodeids.
  		   If that doesn't result in a usable node fall back to the
  		   path for the previous case.  */
- 		int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
+		int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
  		if (ht_nodeid >= 0 &&
  		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
  			node = apicid_to_node[ht_nodeid];
@@ -853,6 +883,7 @@
 
 #ifdef CONFIG_SMP
 	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+	c->cpu_index = 0;
 #endif
 }
 
@@ -959,6 +990,7 @@
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	struct cpuinfo_x86 *c = v;
+	int cpu = 0;
 
 	/* 
 	 * These flag bits must match the definitions in <asm/cpufeature.h>.
@@ -1037,8 +1069,9 @@
 
 
 #ifdef CONFIG_SMP
-	if (!cpu_online(c-cpu_data))
+	if (!cpu_online(c->cpu_index))
 		return 0;
+	cpu = c->cpu_index;
 #endif
 
 	seq_printf(m,"processor\t: %u\n"
@@ -1046,7 +1079,7 @@
 		     "cpu family\t: %d\n"
 		     "model\t\t: %d\n"
 		     "model name\t: %s\n",
-		     (unsigned)(c-cpu_data),
+		     (unsigned)cpu,
 		     c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
 		     c->x86,
 		     (int)c->x86_model,
@@ -1058,7 +1091,7 @@
 		seq_printf(m, "stepping\t: unknown\n");
 	
 	if (cpu_has(c,X86_FEATURE_TSC)) {
-		unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
+		unsigned int freq = cpufreq_quick_get((unsigned)cpu);
 		if (!freq)
 			freq = cpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
@@ -1071,7 +1104,6 @@
 	
 #ifdef CONFIG_SMP
 	if (smp_num_siblings * c->x86_max_cores > 1) {
-		int cpu = c - cpu_data;
 		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
 		seq_printf(m, "siblings\t: %d\n",
 			       cpus_weight(per_cpu(cpu_core_map, cpu)));
@@ -1129,12 +1161,16 @@
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+	if (*pos == 0)	/* just in case, cpu 0 is not the first */
+		*pos = first_cpu(cpu_possible_map);
+	if ((*pos) < NR_CPUS && cpu_possible(*pos))
+		return &cpu_data(*pos);
+	return NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	++*pos;
+	*pos = next_cpu(*pos, cpu_possible_map);
 	return c_start(m, pos);
 }
 
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 0d79df3..6dc394b 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -200,8 +200,8 @@
 	if (show_unhandled_signals && printk_ratelimit())
 		printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
 		       " esp:%lx oeax:%lx\n",
-		    current->pid > 1 ? KERN_INFO : KERN_EMERG,
-		    current->comm, current->pid, frame, regs->eip,
+		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
+		    current->comm, task_pid_nr(current), frame, regs->eip,
 		    regs->esp, regs->orig_eax);
 
 	force_sig(SIGSEGV, current);
diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
index 791d9f8..2621ca3 100644
--- a/arch/x86/kernel/smp_32.c
+++ b/arch/x86/kernel/smp_32.c
@@ -610,7 +610,7 @@
 	 */
 	cpu_clear(smp_processor_id(), cpu_online_map);
 	disable_local_APIC();
-	if (cpu_data[smp_processor_id()].hlt_works_ok)
+	if (cpu_data(smp_processor_id()).hlt_works_ok)
 		for(;;) halt();
 	for (;;);
 }
@@ -676,7 +676,7 @@
 	int i;
 
 	for (i = 0; i < NR_CPUS; i++) {
-		if (x86_cpu_to_apicid[i] == apic_id)
+		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
 			return i;
 	}
 	return -1;
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 5c29647..03fa6ed 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -322,17 +322,27 @@
 }
 
 /*
- * this function sends a 'generic call function' IPI to one other CPU
- * in the system.
- *
- * cpu is a standard Linux logical CPU number.
+ * this function sends a 'generic call function' IPI to all other CPU
+ * of the system defined in the mask.
  */
-static void
-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-				int nonatomic, int wait)
+
+static int
+__smp_call_function_mask(cpumask_t mask,
+			 void (*func)(void *), void *info,
+			 int wait)
 {
 	struct call_data_struct data;
-	int cpus = 1;
+	cpumask_t allbutself;
+	int cpus;
+
+	allbutself = cpu_online_map;
+	cpu_clear(smp_processor_id(), allbutself);
+
+	cpus_and(mask, mask, allbutself);
+	cpus = cpus_weight(mask);
+
+	if (!cpus)
+		return 0;
 
 	data.func = func;
 	data.info = info;
@@ -343,19 +353,55 @@
 
 	call_data = &data;
 	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
+
+	/* Send a message to other CPUs */
+	if (cpus_equal(mask, allbutself))
+		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+	else
+		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
 
 	/* Wait for response */
 	while (atomic_read(&data.started) != cpus)
 		cpu_relax();
 
 	if (!wait)
-		return;
+		return 0;
 
 	while (atomic_read(&data.finished) != cpus)
 		cpu_relax();
+
+	return 0;
 }
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on.  Must not include the current cpu.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function_mask(cpumask_t mask,
+			   void (*func)(void *), void *info,
+			   int wait)
+{
+	int ret;
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	spin_lock(&call_lock);
+	ret = __smp_call_function_mask(mask, func, info, wait);
+	spin_unlock(&call_lock);
+	return ret;
+}
+EXPORT_SYMBOL(smp_call_function_mask);
 
 /*
  * smp_call_function_single - Run a function on a specific CPU
@@ -374,6 +420,7 @@
 	int nonatomic, int wait)
 {
 	/* prevent preemption and reschedule on another processor */
+	int ret;
 	int me = get_cpu();
 
 	/* Can deadlock when called with interrupts disabled */
@@ -387,51 +434,14 @@
 		return 0;
 	}
 
-	spin_lock(&call_lock);
-	__smp_call_function_single(cpu, func, info, nonatomic, wait);
-	spin_unlock(&call_lock);
+	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
+
 	put_cpu();
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
 /*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-static void __smp_call_function (void (*func) (void *info), void *info,
-				int nonatomic, int wait)
-{
-	struct call_data_struct data;
-	int cpus = num_online_cpus()-1;
-
-	if (!cpus)
-		return;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (!wait)
-		return;
-
-	while (atomic_read(&data.finished) != cpus)
-		cpu_relax();
-}
-
-/*
  * smp_call_function - run a function on all other CPUs.
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
@@ -449,10 +459,7 @@
 int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
 			int wait)
 {
-	spin_lock(&call_lock);
-	__smp_call_function(func,info,nonatomic,wait);
-	spin_unlock(&call_lock);
-	return 0;
+	return smp_call_function_mask(cpu_online_map, func, info, wait);
 }
 EXPORT_SYMBOL(smp_call_function);
 
@@ -479,7 +486,7 @@
 	/* Don't deadlock on the call lock in panic */
 	nolock = !spin_trylock(&call_lock);
 	local_irq_save(flags);
-	__smp_call_function(stop_this_cpu, NULL, 0, 0);
+	__smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
 	if (!nolock)
 		spin_unlock(&call_lock);
 	disable_local_APIC();
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index be3faac..7b8fdfa 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -67,7 +67,7 @@
 EXPORT_SYMBOL(smp_num_siblings);
 
 /* Last level cache ID of each logical CPU */
-int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID;
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -89,12 +89,20 @@
 static cpumask_t smp_commenced_mask;
 
 /* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
+DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
+EXPORT_PER_CPU_SYMBOL(cpu_info);
 
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
-			{ [0 ... NR_CPUS-1] = 0xff };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
+/*
+ * The following static array is used during kernel startup
+ * and the x86_cpu_to_apicid_ptr contains the address of the
+ * array during this time.  Is it zeroed when the per_cpu
+ * data area is removed.
+ */
+u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
+			{ [0 ... NR_CPUS-1] = BAD_APICID };
+void *x86_cpu_to_apicid_ptr;
+DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 
 u8 apicid_2_node[MAX_APICID];
 
@@ -150,9 +158,10 @@
 
 void __cpuinit smp_store_cpu_info(int id)
 {
-	struct cpuinfo_x86 *c = cpu_data + id;
+	struct cpuinfo_x86 *c = &cpu_data(id);
 
 	*c = boot_cpu_data;
+	c->cpu_index = id;
 	if (id!=0)
 		identify_secondary_cpu(c);
 	/*
@@ -294,7 +303,7 @@
 /* maps the cpu to the sched domain representing multi-core */
 cpumask_t cpu_coregroup_map(int cpu)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	/*
 	 * For perf, we return last level cache shared map.
 	 * And for power savings, we return cpu_core_map
@@ -311,41 +320,41 @@
 void __cpuinit set_cpu_sibling_map(int cpu)
 {
 	int i;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	cpu_set(cpu, cpu_sibling_setup_map);
 
 	if (smp_num_siblings > 1) {
 		for_each_cpu_mask(i, cpu_sibling_setup_map) {
-			if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
-			    c[cpu].cpu_core_id == c[i].cpu_core_id) {
+			if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
+			    c->cpu_core_id == cpu_data(i).cpu_core_id) {
 				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
 				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
 				cpu_set(i, per_cpu(cpu_core_map, cpu));
 				cpu_set(cpu, per_cpu(cpu_core_map, i));
-				cpu_set(i, c[cpu].llc_shared_map);
-				cpu_set(cpu, c[i].llc_shared_map);
+				cpu_set(i, c->llc_shared_map);
+				cpu_set(cpu, cpu_data(i).llc_shared_map);
 			}
 		}
 	} else {
 		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
 	}
 
-	cpu_set(cpu, c[cpu].llc_shared_map);
+	cpu_set(cpu, c->llc_shared_map);
 
 	if (current_cpu_data.x86_max_cores == 1) {
 		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
-		c[cpu].booted_cores = 1;
+		c->booted_cores = 1;
 		return;
 	}
 
 	for_each_cpu_mask(i, cpu_sibling_setup_map) {
-		if (cpu_llc_id[cpu] != BAD_APICID &&
-		    cpu_llc_id[cpu] == cpu_llc_id[i]) {
-			cpu_set(i, c[cpu].llc_shared_map);
-			cpu_set(cpu, c[i].llc_shared_map);
+		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
+		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
+			cpu_set(i, c->llc_shared_map);
+			cpu_set(cpu, cpu_data(i).llc_shared_map);
 		}
-		if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
+		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
 			cpu_set(i, per_cpu(cpu_core_map, cpu));
 			cpu_set(cpu, per_cpu(cpu_core_map, i));
 			/*
@@ -357,15 +366,15 @@
 				 * the booted_cores for this new cpu
 				 */
 				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
-					c[cpu].booted_cores++;
+					c->booted_cores++;
 				/*
 				 * increment the core count for all
 				 * the other cpus in this package
 				 */
 				if (i != cpu)
-					c[i].booted_cores++;
-			} else if (i != cpu && !c[cpu].booted_cores)
-				c[cpu].booted_cores = c[i].booted_cores;
+					cpu_data(i).booted_cores++;
+			} else if (i != cpu && !c->booted_cores)
+				c->booted_cores = cpu_data(i).booted_cores;
 		}
 	}
 }
@@ -804,7 +813,7 @@
 
 	irq_ctx_init(cpu);
 
-	x86_cpu_to_apicid[cpu] = apicid;
+	per_cpu(x86_cpu_to_apicid, cpu) = apicid;
 	/*
 	 * This grunge runs the startup process for
 	 * the targeted processor.
@@ -844,7 +853,7 @@
 			/* number CPUs logically, starting from 1 (BSP is 0) */
 			Dprintk("OK.\n");
 			printk("CPU%d: ", cpu);
-			print_cpu_info(&cpu_data[cpu]);
+			print_cpu_info(&cpu_data(cpu));
 			Dprintk("CPU has booted.\n");
 		} else {
 			boot_error= 1;
@@ -866,7 +875,7 @@
 		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
 		cpucount--;
 	} else {
-		x86_cpu_to_apicid[cpu] = apicid;
+		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
 		cpu_set(cpu, cpu_present_map);
 	}
 
@@ -915,7 +924,7 @@
 	struct warm_boot_cpu_info info;
 	int	apicid, ret;
 
-	apicid = x86_cpu_to_apicid[cpu];
+	apicid = per_cpu(x86_cpu_to_apicid, cpu);
 	if (apicid == BAD_APICID) {
 		ret = -ENODEV;
 		goto exit;
@@ -961,11 +970,11 @@
 	 */
 	smp_store_cpu_info(0); /* Final full version of the data */
 	printk("CPU%d: ", 0);
-	print_cpu_info(&cpu_data[0]);
+	print_cpu_info(&cpu_data(0));
 
 	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 	boot_cpu_logical_apicid = logical_smp_processor_id();
-	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+	per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid;
 
 	current_thread_info()->cpu = 0;
 
@@ -1008,6 +1017,7 @@
 		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
 		smpboot_clear_io_apic_irqs();
 		phys_cpu_present_map = physid_mask_of_physid(0);
+		map_cpu_to_logical_apicid();
 		cpu_set(0, per_cpu(cpu_sibling_map, 0));
 		cpu_set(0, per_cpu(cpu_core_map, 0));
 		return;
@@ -1029,6 +1039,7 @@
 		}
 		smpboot_clear_io_apic_irqs();
 		phys_cpu_present_map = physid_mask_of_physid(0);
+		map_cpu_to_logical_apicid();
 		cpu_set(0, per_cpu(cpu_sibling_map, 0));
 		cpu_set(0, per_cpu(cpu_core_map, 0));
 		return;
@@ -1082,7 +1093,7 @@
 	Dprintk("Before bogomips.\n");
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
 		if (cpu_isset(cpu, cpu_callout_map))
-			bogosum += cpu_data[cpu].loops_per_jiffy;
+			bogosum += cpu_data(cpu).loops_per_jiffy;
 	printk(KERN_INFO
 		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 		cpucount+1,
@@ -1152,7 +1163,7 @@
 void remove_siblinginfo(int cpu)
 {
 	int sibling;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
 		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
@@ -1160,15 +1171,15 @@
 		 * last thread sibling in this cpu core going down
 		 */
 		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
-			c[sibling].booted_cores--;
+			cpu_data(sibling).booted_cores--;
 	}
 			
 	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
 		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
 	cpus_clear(per_cpu(cpu_sibling_map, cpu));
 	cpus_clear(per_cpu(cpu_core_map, cpu));
-	c[cpu].phys_proc_id = 0;
-	c[cpu].cpu_core_id = 0;
+	c->phys_proc_id = 0;
+	c->cpu_core_id = 0;
 	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index e351ac4..fd1fff6 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -65,7 +65,7 @@
 EXPORT_SYMBOL(smp_num_siblings);
 
 /* Last level cache ID of each logical CPU */
-u8 cpu_llc_id[NR_CPUS] __cpuinitdata  = {[0 ... NR_CPUS-1] = BAD_APICID};
+DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID;
 
 /* Bitmask of currently online CPUs */
 cpumask_t cpu_online_map __read_mostly;
@@ -84,8 +84,8 @@
 EXPORT_SYMBOL(cpu_possible_map);
 
 /* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
+DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
+EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 /* Set when the idlers are all forked */
 int smp_threads_ready;
@@ -138,9 +138,10 @@
 
 static void __cpuinit smp_store_cpu_info(int id)
 {
-	struct cpuinfo_x86 *c = cpu_data + id;
+	struct cpuinfo_x86 *c = &cpu_data(id);
 
 	*c = boot_cpu_data;
+	c->cpu_index = id;
 	identify_cpu(c);
 	print_cpu_info(c);
 }
@@ -237,7 +238,7 @@
 /* maps the cpu to the sched domain representing multi-core */
 cpumask_t cpu_coregroup_map(int cpu)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	/*
 	 * For perf, we return last level cache shared map.
 	 * And for power savings, we return cpu_core_map
@@ -254,41 +255,41 @@
 static inline void set_cpu_sibling_map(int cpu)
 {
 	int i;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	cpu_set(cpu, cpu_sibling_setup_map);
 
 	if (smp_num_siblings > 1) {
 		for_each_cpu_mask(i, cpu_sibling_setup_map) {
-			if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
-			    c[cpu].cpu_core_id == c[i].cpu_core_id) {
+			if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
+			    c->cpu_core_id == cpu_data(i).cpu_core_id) {
 				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
 				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
 				cpu_set(i, per_cpu(cpu_core_map, cpu));
 				cpu_set(cpu, per_cpu(cpu_core_map, i));
-				cpu_set(i, c[cpu].llc_shared_map);
-				cpu_set(cpu, c[i].llc_shared_map);
+				cpu_set(i, c->llc_shared_map);
+				cpu_set(cpu, cpu_data(i).llc_shared_map);
 			}
 		}
 	} else {
 		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
 	}
 
-	cpu_set(cpu, c[cpu].llc_shared_map);
+	cpu_set(cpu, c->llc_shared_map);
 
 	if (current_cpu_data.x86_max_cores == 1) {
 		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
-		c[cpu].booted_cores = 1;
+		c->booted_cores = 1;
 		return;
 	}
 
 	for_each_cpu_mask(i, cpu_sibling_setup_map) {
-		if (cpu_llc_id[cpu] != BAD_APICID &&
-		    cpu_llc_id[cpu] == cpu_llc_id[i]) {
-			cpu_set(i, c[cpu].llc_shared_map);
-			cpu_set(cpu, c[i].llc_shared_map);
+		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
+		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
+			cpu_set(i, c->llc_shared_map);
+			cpu_set(cpu, cpu_data(i).llc_shared_map);
 		}
-		if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
+		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
 			cpu_set(i, per_cpu(cpu_core_map, cpu));
 			cpu_set(cpu, per_cpu(cpu_core_map, i));
 			/*
@@ -300,15 +301,15 @@
 				 * the booted_cores for this new cpu
 				 */
 				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
-					c[cpu].booted_cores++;
+					c->booted_cores++;
 				/*
 				 * increment the core count for all
 				 * the other cpus in this package
 				 */
 				if (i != cpu)
-					c[i].booted_cores++;
-			} else if (i != cpu && !c[cpu].booted_cores)
-				c[cpu].booted_cores = c[i].booted_cores;
+					cpu_data(i).booted_cores++;
+			} else if (i != cpu && !c->booted_cores)
+				c->booted_cores = cpu_data(i).booted_cores;
 		}
 	}
 }
@@ -694,7 +695,7 @@
 		clear_node_cpumask(cpu); /* was set by numa_add_cpu */
 		cpu_clear(cpu, cpu_present_map);
 		cpu_clear(cpu, cpu_possible_map);
-		x86_cpu_to_apicid[cpu] = BAD_APICID;
+		per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
 		return -EIO;
 	}
 
@@ -841,6 +842,26 @@
 }
 
 /*
+ * Copy apicid's found by MP_processor_info from initial array to the per cpu
+ * data area.  The x86_cpu_to_apicid_init array is then expendable and the
+ * x86_cpu_to_apicid_ptr is zeroed indicating that the static array is no
+ * longer available.
+ */
+void __init smp_set_apicids(void)
+{
+	int cpu;
+
+	for_each_cpu_mask(cpu, cpu_possible_map) {
+		if (per_cpu_offset(cpu))
+			per_cpu(x86_cpu_to_apicid, cpu) =
+						x86_cpu_to_apicid_init[cpu];
+	}
+
+	/* indicate the static array will be going away soon */
+	x86_cpu_to_apicid_ptr = NULL;
+}
+
+/*
  * Prepare for SMP bootup.  The MP table or ACPI has been read
  * earlier.  Just do some sanity checking here and enable APIC mode.
  */
@@ -849,6 +870,7 @@
 	nmi_watchdog_default();
 	current_cpu_data = boot_cpu_data;
 	current_thread_info()->cpu = 0;  /* needed? */
+	smp_set_apicids();
 	set_cpu_sibling_map(0);
 
 	if (smp_sanity_check(max_cpus) < 0) {
@@ -968,7 +990,7 @@
 static void remove_siblinginfo(int cpu)
 {
 	int sibling;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
 		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
@@ -976,15 +998,15 @@
 		 * last thread sibling in this cpu core going down
 		 */
 		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
-			c[sibling].booted_cores--;
+			cpu_data(sibling).booted_cores--;
 	}
 			
 	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
 		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
 	cpus_clear(per_cpu(cpu_sibling_map, cpu));
 	cpus_clear(per_cpu(cpu_core_map, cpu));
-	c[cpu].phys_proc_id = 0;
-	c[cpu].cpu_core_id = 0;
+	c->phys_proc_id = 0;
+	c->cpu_core_id = 0;
 	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index f8fafe5..622bb02 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -32,9 +32,9 @@
 	/*
 	 * descriptor tables
 	 */
-	asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
-	asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
-	asm volatile ("str %0"  : "=m" (ctxt->tr));
+	store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
+	store_idt((struct desc_ptr *)&ctxt->idt_limit);
+	store_tr(ctxt->tr);
 
 	/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
 	/*
@@ -91,8 +91,9 @@
 	 * now restore the descriptor tables to their proper values
 	 * ltr is done i fix_processor_context().
 	 */
-	asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
-	asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
+	load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
+	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
+
 
 	/*
 	 * segment registers
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index b132d39..cc9acac 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -63,6 +63,9 @@
 
 int panic_on_unrecovered_nmi;
 
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
+EXPORT_SYMBOL_GPL(used_vectors);
+
 asmlinkage int system_call(void);
 
 /* Do we ignore FPU interrupts ? */
@@ -288,48 +291,24 @@
 void show_registers(struct pt_regs *regs)
 {
 	int i;
-	int in_kernel = 1;
-	unsigned long esp;
-	unsigned short ss, gs;
 
-	esp = (unsigned long) (&regs->esp);
-	savesegment(ss, ss);
-	savesegment(gs, gs);
-	if (user_mode_vm(regs)) {
-		in_kernel = 0;
-		esp = regs->esp;
-		ss = regs->xss & 0xffff;
-	}
 	print_modules();
-	printk(KERN_EMERG "CPU:    %d\n"
-		KERN_EMERG "EIP:    %04x:[<%08lx>]    %s VLI\n"
-		KERN_EMERG "EFLAGS: %08lx   (%s %.*s)\n",
-		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
-		print_tainted(), regs->eflags, init_utsname()->release,
-		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version);
-	print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
-	printk(KERN_EMERG "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
-		regs->eax, regs->ebx, regs->ecx, regs->edx);
-	printk(KERN_EMERG "esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
-		regs->esi, regs->edi, regs->ebp, esp);
-	printk(KERN_EMERG "ds: %04x   es: %04x   fs: %04x  gs: %04x  ss: %04x\n",
-	       regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
+	__show_registers(regs, 0);
 	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
-		TASK_COMM_LEN, current->comm, current->pid,
+		TASK_COMM_LEN, current->comm, task_pid_nr(current),
 		current_thread_info(), current, task_thread_info(current));
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
 	 */
-	if (in_kernel) {
+	if (!user_mode_vm(regs)) {
 		u8 *eip;
 		unsigned int code_prologue = code_bytes * 43 / 64;
 		unsigned int code_len = code_bytes;
 		unsigned char c;
 
 		printk("\n" KERN_EMERG "Stack: ");
-		show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
+		show_stack_log_lvl(NULL, regs, &regs->esp, KERN_EMERG);
 
 		printk(KERN_EMERG "Code: ");
 
@@ -374,11 +353,11 @@
 void die(const char * str, struct pt_regs * regs, long err)
 {
 	static struct {
-		spinlock_t lock;
+		raw_spinlock_t lock;
 		u32 lock_owner;
 		int lock_owner_depth;
 	} die = {
-		.lock =			__SPIN_LOCK_UNLOCKED(die.lock),
+		.lock =			__RAW_SPIN_LOCK_UNLOCKED,
 		.lock_owner =		-1,
 		.lock_owner_depth =	0
 	};
@@ -389,13 +368,14 @@
 
 	if (die.lock_owner != raw_smp_processor_id()) {
 		console_verbose();
-		spin_lock_irqsave(&die.lock, flags);
+		__raw_spin_lock(&die.lock);
+		raw_local_save_flags(flags);
 		die.lock_owner = smp_processor_id();
 		die.lock_owner_depth = 0;
 		bust_spinlocks(1);
 	}
 	else
-		local_save_flags(flags);
+		raw_local_save_flags(flags);
 
 	if (++die.lock_owner_depth < 3) {
 		unsigned long esp;
@@ -439,7 +419,8 @@
 	bust_spinlocks(0);
 	die.lock_owner = -1;
 	add_taint(TAINT_DIE);
-	spin_unlock_irqrestore(&die.lock, flags);
+	__raw_spin_unlock(&die.lock);
+	raw_local_irq_restore(flags);
 
 	if (!regs)
 		return;
@@ -622,7 +603,7 @@
 	    printk_ratelimit())
 		printk(KERN_INFO
 		    "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
-		    current->comm, current->pid,
+		    current->comm, task_pid_nr(current),
 		    regs->eip, regs->esp, error_code);
 
 	force_sig(SIGSEGV, current);
@@ -1142,6 +1123,8 @@
 
 void __init trap_init(void)
 {
+	int i;
+
 #ifdef CONFIG_EISA
 	void __iomem *p = ioremap(0x0FFFD9, 4);
 	if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
@@ -1201,6 +1184,11 @@
 
 	set_system_gate(SYSCALL_VECTOR,&system_call);
 
+	/* Reserve all the builtin and the syscall vector. */
+	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
+		set_bit(i, used_vectors);
+	set_bit(SYSCALL_VECTOR, used_vectors);
+
 	/*
 	 * Should be a barrier for any external CPU state.
 	 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index b4a9b3d..df690c3 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -462,7 +462,7 @@
 EXPORT_SYMBOL(out_of_line_bug);
 #endif
 
-static DEFINE_SPINLOCK(die_lock);
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
 
@@ -474,13 +474,13 @@
 	oops_enter();
 
 	/* racy, but better than risking deadlock. */
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	cpu = smp_processor_id();
-	if (!spin_trylock(&die_lock)) { 
+	if (!__raw_spin_trylock(&die_lock)) {
 		if (cpu == die_owner) 
 			/* nested oops. should stop eventually */;
 		else
-			spin_lock(&die_lock);
+			__raw_spin_lock(&die_lock);
 	}
 	die_nest_count++;
 	die_owner = cpu;
@@ -494,12 +494,10 @@
 	die_owner = -1;
 	bust_spinlocks(0);
 	die_nest_count--;
-	if (die_nest_count)
-		/* We still own the lock */
-		local_irq_restore(flags);
-	else
+	if (!die_nest_count)
 		/* Nest count reaches zero, release the lock. */
-		spin_unlock_irqrestore(&die_lock, flags);
+		__raw_spin_unlock(&die_lock);
+	raw_local_irq_restore(flags);
 	if (panic_on_oops)
 		panic("Fatal exception");
 	oops_exit();
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index e87a393..b8a7cf6 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -181,8 +181,8 @@
 	if (cpu_has_tsc) {
 		cpu_khz = calculate_cpu_khz();
 		tsc_khz = cpu_khz;
-		cpu_data[0].loops_per_jiffy =
-			cpufreq_scale(cpu_data[0].loops_per_jiffy,
+		cpu_data(0).loops_per_jiffy =
+			cpufreq_scale(cpu_data(0).loops_per_jiffy,
 					cpu_khz_old, cpu_khz);
 		return 0;
 	} else
@@ -215,7 +215,7 @@
 			return 0;
 		}
 		ref_freq = freq->old;
-		loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
+		loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy;
 		cpu_khz_ref = cpu_khz;
 	}
 
@@ -223,7 +223,7 @@
 	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
 	    (val == CPUFREQ_RESUMECHANGE)) {
 		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-			cpu_data[freq->cpu].loops_per_jiffy =
+			cpu_data(freq->cpu).loops_per_jiffy =
 				cpufreq_scale(loops_per_jiffy_ref,
 						ref_freq, freq->new);
 
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 9f22e54..9c70af4 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -73,13 +73,13 @@
 	struct cpufreq_freqs *freq = data;
 	unsigned long *lpj, dummy;
 
-	if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
+	if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
 		return 0;
 
 	lpj = &dummy;
 	if (!(freq->flags & CPUFREQ_CONST_LOOPS))
 #ifdef CONFIG_SMP
-		lpj = &cpu_data[freq->cpu].loops_per_jiffy;
+		lpj = &cpu_data(freq->cpu).loops_per_jiffy;
 #else
 		lpj = &boot_cpu_data.loops_per_jiffy;
 #endif
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 585541c..78f2250 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -48,7 +48,7 @@
 	({unsigned long v;  		\
 	extern char __vsyscall_0; 	\
 	  asm("" : "=r" (v) : "0" (x)); \
-	  ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
+	  ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); })
 
 /*
  * vsyscall_gtod_data contains data that is :
@@ -291,7 +291,7 @@
 #ifdef CONFIG_NUMA
 	node = cpu_to_node(cpu);
 #endif
-	if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
+	if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
 		write_rdtscp_aux((node << 12) | cpu);
 
 	/* Store cpu number in limit so that it can be loaded quickly
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c
index f6edb11..952e7a8 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay_32.c
@@ -82,7 +82,7 @@
 	__asm__("mull %0"
 		:"=d" (xloops), "=&a" (d0)
 		:"1" (xloops), "0"
-		(cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
+		(cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));
 
 	__delay(++xloops);
 }
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
index 2dbebd3..0ebbfb9 100644
--- a/arch/x86/lib/delay_64.c
+++ b/arch/x86/lib/delay_64.c
@@ -40,7 +40,8 @@
 
 inline void __const_udelay(unsigned long xloops)
 {
-	__delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1);
+	__delay(((xloops * HZ *
+		cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1);
 }
 EXPORT_SYMBOL(__const_udelay);
 
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9f38b12..8bab2b2 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -748,7 +748,7 @@
 			retval = get_user_pages(current, current->mm,
 					(unsigned long )to, 1, 1, 0, &pg, NULL);
 
-			if (retval == -ENOMEM && is_init(current)) {
+			if (retval == -ENOMEM && is_global_init(current)) {
 				up_read(&current->mm->mmap_sem);
 				congestion_wait(WRITE, HZ/50);
 				goto survive;
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index e4928aa..f93a730 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -36,8 +36,8 @@
 
 /* per CPU data structure (for /proc/cpuinfo et al), visible externally
  * indexed physically */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
+DEFINE_PER_CPU(cpuinfo_x86, cpu_info) __cacheline_aligned;
+EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 /* physical ID of the CPU used to boot the system */
 unsigned char boot_cpu_id;
@@ -430,7 +430,7 @@
 void __init
 smp_store_cpu_info(int id)
 {
-	struct cpuinfo_x86 *c=&cpu_data[id];
+	struct cpuinfo_x86 *c = &cpu_data(id);
 
 	*c = boot_cpu_data;
 
@@ -634,7 +634,7 @@
 			cpu, smp_processor_id()));
 	
 		printk("CPU%d: ", cpu);
-		print_cpu_info(&cpu_data[cpu]);
+		print_cpu_info(&cpu_data(cpu));
 		wmb();
 		cpu_set(cpu, cpu_callout_map);
 		cpu_set(cpu, cpu_present_map);
@@ -683,7 +683,7 @@
 	 */
 	smp_store_cpu_info(boot_cpu_id);
 	printk("CPU%d: ", boot_cpu_id);
-	print_cpu_info(&cpu_data[boot_cpu_id]);
+	print_cpu_info(&cpu_data(boot_cpu_id));
 
 	if(is_cpu_quad()) {
 		/* booting on a Quad CPU */
@@ -714,7 +714,7 @@
 		unsigned long bogosum = 0;
 		for (i = 0; i < NR_CPUS; i++)
 			if (cpu_isset(i, cpu_online_map))
-				bogosum += cpu_data[i].loops_per_jiffy;
+				bogosum += cpu_data(i).loops_per_jiffy;
 		printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 			cpucount+1,
 			bogosum/(500000/HZ),
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 6555c3d..b695d70 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -471,8 +471,8 @@
 		    printk_ratelimit()) {
 			printk("%s%s[%d]: segfault at %08lx eip %08lx "
 			    "esp %08lx error %lx\n",
-			    tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
-			    tsk->comm, tsk->pid, address, regs->eip,
+			    task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+			    tsk->comm, task_pid_nr(tsk), address, regs->eip,
 			    regs->esp, error_code);
 		}
 		tsk->thread.cr2 = address;
@@ -564,7 +564,8 @@
 		 * it's allocated already.
 		 */
 		if ((page >> PAGE_SHIFT) < max_low_pfn
-		    && (page & _PAGE_PRESENT)) {
+		    && (page & _PAGE_PRESENT)
+		    && !(page & _PAGE_PSE)) {
 			page &= PAGE_MASK;
 			page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
 			                                         & (PTRS_PER_PTE - 1)];
@@ -587,7 +588,7 @@
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(tsk)) {
+	if (is_global_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 5e0e549..00be7f0 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -169,7 +169,7 @@
 	pmd = pmd_offset(pud, address);
 	if (bad_address(pmd)) goto bad;
 	printk("PMD %lx ", pmd_val(*pmd));
-	if (!pmd_present(*pmd))	goto ret;	 
+	if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
 
 	pte = pte_offset_kernel(pmd, address);
 	if (bad_address(pte)) goto bad;
@@ -285,7 +285,6 @@
 	return 0;
 }
 
-static int page_fault_trace;
 int show_unhandled_signals = 1;
 
 /*
@@ -354,10 +353,6 @@
 	if (likely(regs->eflags & X86_EFLAGS_IF))
 		local_irq_enable();
 
-	if (unlikely(page_fault_trace))
-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
-		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
-
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(address, regs, error_code);
 
@@ -488,7 +483,7 @@
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
 			printk(
-		       "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
+		       "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n",
 					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
 					tsk->comm, tsk->pid, address, regs->rip,
 					regs->rsp, error_code);
@@ -554,7 +549,7 @@
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		goto again;
 	}
@@ -621,10 +616,3 @@
 	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 
 				(__START_KERNEL & PGDIR_MASK)));
 }
-
-static int __init enable_pagefaulttrace(char *str)
-{
-	page_fault_trace = 1;
-	return 1;
-}
-__setup("pagefaulttrace", enable_pagefaulttrace);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 5eec5e5..3d6926b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -612,7 +612,7 @@
 {
 	int i;
  	for (i = 0; i < NR_CPUS; i++) {
-		u8 apicid = x86_cpu_to_apicid[i];
+		u8 apicid = x86_cpu_to_apicid_init[i];
 		if (apicid == BAD_APICID)
 			continue;
 		if (apicid_to_node[apicid] == NUMA_NO_NODE)
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 8a4f65b..c7b7dfe 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -230,9 +230,14 @@
 	struct page *pg, *next;
 	struct list_head l;
 
-	down_read(&init_mm.mmap_sem);
+	/*
+	 * Write-protect the semaphore, to exclude two contexts
+	 * doing a list_replace_init() call in parallel and to
+	 * exclude new additions to the deferred_pages list:
+	 */
+	down_write(&init_mm.mmap_sem);
 	list_replace_init(&deferred_pages, &l);
-	up_read(&init_mm.mmap_sem);
+	up_write(&init_mm.mmap_sem);
 
 	flush_map(&l);
 
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index c049ce4..0ed046a 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -13,26 +13,46 @@
 #include <linux/mm.h>
 #include <asm/ptrace.h>
 #include <asm/uaccess.h>
+#include <asm/stacktrace.h>
+
+static void backtrace_warning_symbol(void *data, char *msg,
+				     unsigned long symbol)
+{
+	/* Ignore warnings */
+}
+
+static void backtrace_warning(void *data, char *msg)
+{
+	/* Ignore warnings */
+}
+
+static int backtrace_stack(void *data, char *name)
+{
+	/* Yes, we want all stacks */
+	return 0;
+}
+
+static void backtrace_address(void *data, unsigned long addr)
+{
+	unsigned int *depth = data;
+
+	if ((*depth)--)
+		oprofile_add_trace(addr);
+}
+
+static struct stacktrace_ops backtrace_ops = {
+	.warning = backtrace_warning,
+	.warning_symbol = backtrace_warning_symbol,
+	.stack = backtrace_stack,
+	.address = backtrace_address,
+};
 
 struct frame_head {
-	struct frame_head * ebp;
+	struct frame_head *ebp;
 	unsigned long ret;
 } __attribute__((packed));
 
 static struct frame_head *
-dump_kernel_backtrace(struct frame_head * head)
-{
-	oprofile_add_trace(head->ret);
-
-	/* frame pointers should strictly progress back up the stack
-	 * (towards higher addresses) */
-	if (head >= head->ebp)
-		return NULL;
-
-	return head->ebp;
-}
-
-static struct frame_head *
 dump_user_backtrace(struct frame_head * head)
 {
 	struct frame_head bufhead[2];
@@ -53,72 +73,16 @@
 	return bufhead[0].ebp;
 }
 
-/*
- * |             | /\ Higher addresses
- * |             |
- * --------------- stack base (address of current_thread_info)
- * | thread info |
- * .             .
- * |    stack    |
- * --------------- saved regs->ebp value if valid (frame_head address)
- * .             .
- * --------------- saved regs->rsp value if x86_64
- * |             |
- * --------------- struct pt_regs * stored on stack if 32-bit
- * |             |
- * .             .
- * |             |
- * --------------- %esp
- * |             |
- * |             | \/ Lower addresses
- *
- * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the
- * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other
- * exceptions use special stacks, maintained by the interrupt stack table
- * (IST). These stacks are set up in trap_init() in
- * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point
- * to the kernel stack; instead, it points to some location on the NMI
- * stack. On the other hand, regs->rsp is the stack pointer saved when the
- * NMI occurred. (2) For 32-bit, regs->esp is not valid because the
- * processor does not save %esp on the kernel stack when interrupts occur
- * in the kernel mode.
- */
-#ifdef CONFIG_FRAME_POINTER
-static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
-{
-	unsigned long headaddr = (unsigned long)head;
-#ifdef CONFIG_X86_64
-	unsigned long stack = (unsigned long)regs->rsp;
-#else
-	unsigned long stack = (unsigned long)regs;
-#endif
-	unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE;
-
-	return headaddr > stack && headaddr < stack_base;
-}
-#else
-/* without fp, it's just junk */
-static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
-{
-	return 0;
-}
-#endif
-
-
 void
 x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
-	struct frame_head *head;
-
-#ifdef CONFIG_X86_64
-	head = (struct frame_head *)regs->rbp;
-#else
-	head = (struct frame_head *)regs->ebp;
-#endif
+	struct frame_head *head = (struct frame_head *)frame_pointer(regs);
+	unsigned long stack = stack_pointer(regs);
 
 	if (!user_mode_vm(regs)) {
-		while (depth-- && valid_kernel_stack(head, regs))
-			head = dump_kernel_backtrace(head);
+		if (depth)
+			dump_trace(NULL, regs, (unsigned long *)stack,
+				   &backtrace_ops, &depth);
 		return;
 	}
 
diff --git a/arch/x86_64/.gitignore b/arch/x86_64/.gitignore
new file mode 100644
index 0000000..36ef4c3
--- /dev/null
+++ b/arch/x86_64/.gitignore
@@ -0,0 +1 @@
+boot
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 78cb68f..aab25f3 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -723,7 +723,9 @@
 
 source "drivers/acpi/Kconfig"
 
-source "arch/x86/kernel/cpufreq/Kconfig"
+source "arch/x86/kernel/cpu/cpufreq/Kconfig_64"
+
+source "drivers/cpuidle/Kconfig"
 
 endmenu
 
@@ -766,9 +768,9 @@
 config IA32_EMULATION
 	bool "IA32 Emulation"
 	help
-	  Include code to run 32-bit programs under a 64-bit kernel. You should likely
-	  turn this on, unless you're 100% sure that you don't have any 32-bit programs
-	  left.
+	  Include code to run 32-bit programs under a 64-bit kernel. You should
+	  likely turn this on, unless you're 100% sure that you don't have any
+	  32-bit programs left.
 
 config IA32_AOUT
        tristate "IA32 a.out support"
@@ -799,21 +801,6 @@
 
 source fs/Kconfig
 
-menu "Instrumentation Support"
-
-source "arch/x86/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes"
-	depends on KALLSYMS && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
-
 source "arch/x86_64/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 03e1ede..6d89ab7 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -74,7 +74,7 @@
 CFLAGS_KERNEL += $(cflags-kernel-y)
 KBUILD_AFLAGS += -m64
 
-head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task_64.o
+head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task.o
 
 libs-y 					+= arch/x86/lib/
 core-y					+= arch/x86/kernel/ \
@@ -97,9 +97,9 @@
 KBUILD_IMAGE                  := $(BOOTIMAGE)
 
 bzImage: vmlinux
-	$(Q)mkdir -p $(objtree)/arch/x86_64/boot
-	$(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage
 	$(Q)$(MAKE) $(build)=$(boot) $(BOOTIMAGE)
+	$(Q)mkdir -p $(objtree)/arch/x86_64/boot
+	$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage
 
 bzlilo: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zlilo
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 7fbb44b..85ffbb4 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -251,6 +251,8 @@
 	  provide one yourself.
 endmenu
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/xtensa/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 8be99c7..397bcd6 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -176,7 +176,7 @@
 	printk("Caught unhandled exception in '%s' "
 	       "(pid = %d, pc = %#010lx) - should not happen\n"
 	       "\tEXCCAUSE is %ld\n",
-	       current->comm, current->pid, regs->pc, exccause);
+	       current->comm, task_pid_nr(current), regs->pc, exccause);
 	force_sig(SIGILL, current);
 }
 
@@ -228,7 +228,7 @@
 	/* If in user mode, send SIGILL signal to current process. */
 
 	printk("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n",
-	    current->comm, current->pid, regs->pc);
+	    current->comm, task_pid_nr(current), regs->pc);
 	force_sig(SIGILL, current);
 }
 
@@ -254,7 +254,7 @@
 	current->thread.error_code = -3;
 	printk("Unaligned memory access to %08lx in '%s' "
 	       "(pid = %d, pc = %#010lx)\n",
-	       regs->excvaddr, current->comm, current->pid, regs->pc);
+	       regs->excvaddr, current->comm, task_pid_nr(current), regs->pc);
 	info.si_signo = SIGBUS;
 	info.si_errno = 0;
 	info.si_code = BUS_ADRALN;
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 2f84285..33f366b 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -145,7 +145,7 @@
 	 */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 3935469..8025d64 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3367,7 +3367,7 @@
 		if (unlikely(block_dump)) {
 			char b[BDEVNAME_SIZE];
 			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
-				current->comm, current->pid,
+			current->comm, task_pid_nr(current),
 				(rw & WRITE) ? "WRITE" : "READ",
 				(unsigned long long)bio->bi_sector,
 				bdevname(bio->bi_bdev,b));
@@ -3739,7 +3739,7 @@
 
 /**
  * end_request - end I/O on the current segment of the request
- * @rq:		the request being processed
+ * @req:	the request being processed
  * @uptodate:	error value or 0/1 uptodate flag
  *
  * Description:
diff --git a/drivers/Makefile b/drivers/Makefile
index d2dc01c..cfe38ff 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -76,6 +76,7 @@
 obj-$(CONFIG_EISA)		+= eisa/
 obj-$(CONFIG_LGUEST_GUEST)	+= lguest/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
+obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_NEW_LEDS)		+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 4875f01..b833891 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -88,7 +88,7 @@
 
 config ACPI_AC
 	tristate "AC Adapter"
-	depends on X86
+	depends on X86 && POWER_SUPPLY
 	default y
 	help
 	  This driver adds support for the AC Adapter object, which indicates
@@ -97,7 +97,7 @@
 
 config ACPI_BATTERY
 	tristate "Battery"
-	depends on X86
+	depends on X86 && POWER_SUPPLY
 	default y
 	help
 	  This driver adds support for battery information through
@@ -117,6 +117,7 @@
 config ACPI_VIDEO
 	tristate "Video"
 	depends on X86 && BACKLIGHT_CLASS_DEVICE && VIDEO_OUTPUT_CONTROL
+	depends on INPUT
 	help
 	  This driver implement the ACPI Extensions For Display Adapters
 	  for integrated graphics devices on motherboard, as specified in
@@ -349,12 +350,11 @@
 		$>modprobe acpi_memhotplug 
 
 config ACPI_SBS
-	tristate "Smart Battery System (EXPERIMENTAL)"
+	tristate "Smart Battery System"
 	depends on X86
-	depends on EXPERIMENTAL
+	depends on POWER_SUPPLY
 	help
-	  This driver adds support for the Smart Battery System.
-	  A "Smart Battery" is quite old and quite rare compared
-	  to today's ACPI "Control Method" battery.
+	  This driver adds support for the Smart Battery System, another
+	  type of access to battery information, found on some laptops.
 
 endif	# ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index d4336f1..54e3ab0 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -60,3 +60,4 @@
 obj-$(CONFIG_ACPI_HOTPLUG_MEMORY)	+= acpi_memhotplug.o
 obj-y				+= cm_sbs.o
 obj-$(CONFIG_ACPI_SBS)		+= sbs.o
+obj-$(CONFIG_ACPI_SBS)		+= sbshc.o
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 26d7070..e03de37 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -29,6 +29,7 @@
 #include <linux/types.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/power_supply.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
@@ -72,16 +73,37 @@
 };
 
 struct acpi_ac {
+	struct power_supply charger;
 	struct acpi_device * device;
 	unsigned long state;
 };
 
+#define to_acpi_ac(x) container_of(x, struct acpi_ac, charger);
+
 static const struct file_operations acpi_ac_fops = {
 	.open = acpi_ac_open_fs,
 	.read = seq_read,
 	.llseek = seq_lseek,
 	.release = single_release,
 };
+static int get_ac_property(struct power_supply *psy,
+			   enum power_supply_property psp,
+			   union power_supply_propval *val)
+{
+	struct acpi_ac *ac = to_acpi_ac(psy);
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = ac->state;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static enum power_supply_property ac_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+};
 
 /* --------------------------------------------------------------------------
                                AC Adapter Management
@@ -208,6 +230,7 @@
 		acpi_bus_generate_netlink_event(device->pnp.device_class,
 						  device->dev.bus_id, event,
 						  (u32) ac->state);
+		kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE);
 		break;
 	default:
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
@@ -244,7 +267,12 @@
 	result = acpi_ac_add_fs(device);
 	if (result)
 		goto end;
-
+	ac->charger.name = acpi_device_bid(device);
+	ac->charger.type = POWER_SUPPLY_TYPE_MAINS;
+	ac->charger.properties = ac_props;
+	ac->charger.num_properties = ARRAY_SIZE(ac_props);
+	ac->charger.get_property = get_ac_property;
+	power_supply_register(&ac->device->dev, &ac->charger);
 	status = acpi_install_notify_handler(device->handle,
 					     ACPI_ALL_NOTIFY, acpi_ac_notify,
 					     ac);
@@ -279,7 +307,8 @@
 
 	status = acpi_remove_notify_handler(device->handle,
 					    ACPI_ALL_NOTIFY, acpi_ac_notify);
-
+	if (ac->charger.dev)
+		power_supply_unregister(&ac->charger);
 	acpi_ac_remove_fs(device);
 
 	kfree(ac);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 9b2c0f7..681e26b 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -1,6 +1,8 @@
 /*
- *  acpi_battery.c - ACPI Battery Driver ($Revision: 37 $)
+ *  battery.c - ACPI Battery Driver (Revision: 2.0)
  *
+ *  Copyright (C) 2007 Alexey Starikovskiy <astarikovskiy@suse.de>
+ *  Copyright (C) 2004-2007 Vladimir Lebedev <vladimir.p.lebedev@intel.com>
  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *
@@ -27,244 +29,288 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/jiffies.h>
+
+#ifdef CONFIG_ACPI_PROCFS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <asm/uaccess.h>
+#endif
 
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
-#define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF
+#include <linux/power_supply.h>
 
-#define ACPI_BATTERY_FORMAT_BIF	"NNNNNNNNNSSSS"
-#define ACPI_BATTERY_FORMAT_BST	"NNNN"
+#define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF
 
 #define ACPI_BATTERY_COMPONENT		0x00040000
 #define ACPI_BATTERY_CLASS		"battery"
 #define ACPI_BATTERY_DEVICE_NAME	"Battery"
 #define ACPI_BATTERY_NOTIFY_STATUS	0x80
 #define ACPI_BATTERY_NOTIFY_INFO	0x81
-#define ACPI_BATTERY_UNITS_WATTS	"mW"
-#define ACPI_BATTERY_UNITS_AMPS		"mA"
 
 #define _COMPONENT		ACPI_BATTERY_COMPONENT
 
-#define ACPI_BATTERY_UPDATE_TIME	0
-
-#define ACPI_BATTERY_NONE_UPDATE	0
-#define ACPI_BATTERY_EASY_UPDATE	1
-#define ACPI_BATTERY_INIT_UPDATE	2
-
 ACPI_MODULE_NAME("battery");
 
 MODULE_AUTHOR("Paul Diefenbaugh");
+MODULE_AUTHOR("Alexey Starikovskiy <astarikovskiy@suse.de>");
 MODULE_DESCRIPTION("ACPI Battery Driver");
 MODULE_LICENSE("GPL");
 
-static unsigned int update_time = ACPI_BATTERY_UPDATE_TIME;
+static unsigned int cache_time = 1000;
+module_param(cache_time, uint, 0644);
+MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
 
-/* 0 - every time, > 0 - by update_time */
-module_param(update_time, uint, 0644);
-
+#ifdef CONFIG_ACPI_PROCFS
 extern struct proc_dir_entry *acpi_lock_battery_dir(void);
 extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
 
-static int acpi_battery_add(struct acpi_device *device);
-static int acpi_battery_remove(struct acpi_device *device, int type);
-static int acpi_battery_resume(struct acpi_device *device);
+enum acpi_battery_files {
+	info_tag = 0,
+	state_tag,
+	alarm_tag,
+	ACPI_BATTERY_NUMFILES,
+};
+
+#endif
 
 static const struct acpi_device_id battery_device_ids[] = {
 	{"PNP0C0A", 0},
 	{"", 0},
 };
+
 MODULE_DEVICE_TABLE(acpi, battery_device_ids);
 
-static struct acpi_driver acpi_battery_driver = {
-	.name = "battery",
-	.class = ACPI_BATTERY_CLASS,
-	.ids = battery_device_ids,
-	.ops = {
-		.add = acpi_battery_add,
-		.resume = acpi_battery_resume,
-		.remove = acpi_battery_remove,
-		},
-};
-
-struct acpi_battery_state {
-	acpi_integer state;
-	acpi_integer present_rate;
-	acpi_integer remaining_capacity;
-	acpi_integer present_voltage;
-};
-
-struct acpi_battery_info {
-	acpi_integer power_unit;
-	acpi_integer design_capacity;
-	acpi_integer last_full_capacity;
-	acpi_integer battery_technology;
-	acpi_integer design_voltage;
-	acpi_integer design_capacity_warning;
-	acpi_integer design_capacity_low;
-	acpi_integer battery_capacity_granularity_1;
-	acpi_integer battery_capacity_granularity_2;
-	acpi_string model_number;
-	acpi_string serial_number;
-	acpi_string battery_type;
-	acpi_string oem_info;
-};
-
-enum acpi_battery_files{
-	ACPI_BATTERY_INFO = 0,
-	ACPI_BATTERY_STATE,
-	ACPI_BATTERY_ALARM,
-	ACPI_BATTERY_NUMFILES,
-};
-
-struct acpi_battery_flags {
-	u8 battery_present_prev;
-	u8 alarm_present;
-	u8 init_update;
-	u8 update[ACPI_BATTERY_NUMFILES];
-	u8 power_unit;
-};
 
 struct acpi_battery {
-	struct mutex mutex;
+	struct mutex lock;
+	struct power_supply bat;
 	struct acpi_device *device;
-	struct acpi_battery_flags flags;
-	struct acpi_buffer bif_data;
-	struct acpi_buffer bst_data;
-	unsigned long alarm;
-	unsigned long update_time[ACPI_BATTERY_NUMFILES];
+	unsigned long update_time;
+	int current_now;
+	int capacity_now;
+	int voltage_now;
+	int design_capacity;
+	int full_charge_capacity;
+	int technology;
+	int design_voltage;
+	int design_capacity_warning;
+	int design_capacity_low;
+	int capacity_granularity_1;
+	int capacity_granularity_2;
+	int alarm;
+	char model_number[32];
+	char serial_number[32];
+	char type[32];
+	char oem_info[32];
+	int state;
+	int power_unit;
+	u8 alarm_present;
 };
 
+#define to_acpi_battery(x) container_of(x, struct acpi_battery, bat);
+
 inline int acpi_battery_present(struct acpi_battery *battery)
 {
 	return battery->device->status.battery_present;
 }
-inline char *acpi_battery_power_units(struct acpi_battery *battery)
+
+static int acpi_battery_technology(struct acpi_battery *battery)
 {
-	if (battery->flags.power_unit)
-		return ACPI_BATTERY_UNITS_AMPS;
-	else
-		return ACPI_BATTERY_UNITS_WATTS;
+	if (!strcasecmp("NiCd", battery->type))
+		return POWER_SUPPLY_TECHNOLOGY_NiCd;
+	if (!strcasecmp("NiMH", battery->type))
+		return POWER_SUPPLY_TECHNOLOGY_NiMH;
+	if (!strcasecmp("LION", battery->type))
+		return POWER_SUPPLY_TECHNOLOGY_LION;
+	if (!strcasecmp("LiP", battery->type))
+		return POWER_SUPPLY_TECHNOLOGY_LIPO;
+	return POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
 }
 
-inline acpi_handle acpi_battery_handle(struct acpi_battery *battery)
+static int acpi_battery_get_property(struct power_supply *psy,
+				     enum power_supply_property psp,
+				     union power_supply_propval *val)
 {
-	return battery->device->handle;
+	struct acpi_battery *battery = to_acpi_battery(psy);
+
+	if ((!acpi_battery_present(battery)) &&
+	     psp != POWER_SUPPLY_PROP_PRESENT)
+		return -ENODEV;
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		if (battery->state & 0x01)
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		else if (battery->state & 0x02)
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else if (battery->state == 0)
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = acpi_battery_present(battery);
+		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = acpi_battery_technology(battery);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		val->intval = battery->design_voltage * 1000;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = battery->voltage_now * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		val->intval = battery->current_now * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+		val->intval = battery->design_capacity * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_ENERGY_FULL:
+		val->intval = battery->full_charge_capacity * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+		val->intval = battery->capacity_now * 1000;
+		break;
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = battery->model_number;
+		break;
+	case POWER_SUPPLY_PROP_MANUFACTURER:
+		val->strval = battery->oem_info;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
 }
 
+static enum power_supply_property charge_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static enum power_supply_property energy_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
+	POWER_SUPPLY_PROP_ENERGY_FULL,
+	POWER_SUPPLY_PROP_ENERGY_NOW,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+#ifdef CONFIG_ACPI_PROCFS
+inline char *acpi_battery_units(struct acpi_battery *battery)
+{
+	return (battery->power_unit)?"mA":"mW";
+}
+#endif
+
 /* --------------------------------------------------------------------------
                                Battery Management
    -------------------------------------------------------------------------- */
+struct acpi_offsets {
+	size_t offset;		/* offset inside struct acpi_sbs_battery */
+	u8 mode;		/* int or string? */
+};
 
-static void acpi_battery_check_result(struct acpi_battery *battery, int result)
+static struct acpi_offsets state_offsets[] = {
+	{offsetof(struct acpi_battery, state), 0},
+	{offsetof(struct acpi_battery, current_now), 0},
+	{offsetof(struct acpi_battery, capacity_now), 0},
+	{offsetof(struct acpi_battery, voltage_now), 0},
+};
+
+static struct acpi_offsets info_offsets[] = {
+	{offsetof(struct acpi_battery, power_unit), 0},
+	{offsetof(struct acpi_battery, design_capacity), 0},
+	{offsetof(struct acpi_battery, full_charge_capacity), 0},
+	{offsetof(struct acpi_battery, technology), 0},
+	{offsetof(struct acpi_battery, design_voltage), 0},
+	{offsetof(struct acpi_battery, design_capacity_warning), 0},
+	{offsetof(struct acpi_battery, design_capacity_low), 0},
+	{offsetof(struct acpi_battery, capacity_granularity_1), 0},
+	{offsetof(struct acpi_battery, capacity_granularity_2), 0},
+	{offsetof(struct acpi_battery, model_number), 1},
+	{offsetof(struct acpi_battery, serial_number), 1},
+	{offsetof(struct acpi_battery, type), 1},
+	{offsetof(struct acpi_battery, oem_info), 1},
+};
+
+static int extract_package(struct acpi_battery *battery,
+			   union acpi_object *package,
+			   struct acpi_offsets *offsets, int num)
 {
-	if (!battery)
-		return;
-
-	if (result) {
-		battery->flags.init_update = 1;
-	}
-}
-
-static int acpi_battery_extract_package(struct acpi_battery *battery,
-					union acpi_object *package,
-					struct acpi_buffer *format,
-					struct acpi_buffer *data,
-					char *package_name)
-{
-	acpi_status status = AE_OK;
-	struct acpi_buffer data_null = { 0, NULL };
-
-	status = acpi_extract_package(package, format, &data_null);
-	if (status != AE_BUFFER_OVERFLOW) {
-		ACPI_EXCEPTION((AE_INFO, status, "Extracting size %s",
-				package_name));
-		return -ENODEV;
-	}
-
-	if (data_null.length != data->length) {
-		kfree(data->pointer);
-		data->pointer = kzalloc(data_null.length, GFP_KERNEL);
-		if (!data->pointer) {
-			ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY, "kzalloc()"));
-			return -ENOMEM;
+	int i, *x;
+	union acpi_object *element;
+	if (package->type != ACPI_TYPE_PACKAGE)
+		return -EFAULT;
+	for (i = 0; i < num; ++i) {
+		if (package->package.count <= i)
+			return -EFAULT;
+		element = &package->package.elements[i];
+		if (offsets[i].mode) {
+			if (element->type != ACPI_TYPE_STRING &&
+			    element->type != ACPI_TYPE_BUFFER)
+				return -EFAULT;
+			strncpy((u8 *)battery + offsets[i].offset,
+				element->string.pointer, 32);
+		} else {
+			if (element->type != ACPI_TYPE_INTEGER)
+				return -EFAULT;
+			x = (int *)((u8 *)battery + offsets[i].offset);
+			*x = element->integer.value;
 		}
-		data->length = data_null.length;
 	}
-
-	status = acpi_extract_package(package, format, data);
-	if (ACPI_FAILURE(status)) {
-		ACPI_EXCEPTION((AE_INFO, status, "Extracting %s",
-				package_name));
-		return -ENODEV;
-	}
-
 	return 0;
 }
 
 static int acpi_battery_get_status(struct acpi_battery *battery)
 {
-	int result = 0;
-
-	result = acpi_bus_get_status(battery->device);
-	if (result) {
+	if (acpi_bus_get_status(battery->device)) {
 		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Evaluating _STA"));
 		return -ENODEV;
 	}
-	return result;
+	return 0;
 }
 
 static int acpi_battery_get_info(struct acpi_battery *battery)
 {
-	int result = 0;
+	int result = -EFAULT;
 	acpi_status status = 0;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BIF),
-		ACPI_BATTERY_FORMAT_BIF
-	};
-	union acpi_object *package = NULL;
-	struct acpi_buffer *data = NULL;
-	struct acpi_battery_info *bif = NULL;
-
-	battery->update_time[ACPI_BATTERY_INFO] = get_seconds();
 
 	if (!acpi_battery_present(battery))
 		return 0;
+	mutex_lock(&battery->lock);
+	status = acpi_evaluate_object(battery->device->handle, "_BIF",
+				      NULL, &buffer);
+	mutex_unlock(&battery->lock);
 
-	/* Evaluate _BIF */
-
-	status =
-	    acpi_evaluate_object(acpi_battery_handle(battery), "_BIF", NULL,
-				 &buffer);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BIF"));
 		return -ENODEV;
 	}
 
-	package = buffer.pointer;
-
-	data = &battery->bif_data;
-
-	/* Extract Package Data */
-
-	result =
-	    acpi_battery_extract_package(battery, package, &format, data,
-					 "_BIF");
-	if (result)
-		goto end;
-
-      end:
-
+	result = extract_package(battery, buffer.pointer,
+				 info_offsets, ARRAY_SIZE(info_offsets));
 	kfree(buffer.pointer);
-
-	if (!result) {
-		bif = data->pointer;
-		battery->flags.power_unit = bif->power_unit;
-	}
-
 	return result;
 }
 
@@ -273,342 +319,203 @@
 	int result = 0;
 	acpi_status status = 0;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BST),
-		ACPI_BATTERY_FORMAT_BST
-	};
-	union acpi_object *package = NULL;
-	struct acpi_buffer *data = NULL;
-
-	battery->update_time[ACPI_BATTERY_STATE] = get_seconds();
 
 	if (!acpi_battery_present(battery))
 		return 0;
 
-	/* Evaluate _BST */
+	if (battery->update_time &&
+	    time_before(jiffies, battery->update_time +
+			msecs_to_jiffies(cache_time)))
+		return 0;
 
-	status =
-	    acpi_evaluate_object(acpi_battery_handle(battery), "_BST", NULL,
-				 &buffer);
+	mutex_lock(&battery->lock);
+	status = acpi_evaluate_object(battery->device->handle, "_BST",
+				      NULL, &buffer);
+	mutex_unlock(&battery->lock);
+
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BST"));
 		return -ENODEV;
 	}
 
-	package = buffer.pointer;
-
-	data = &battery->bst_data;
-
-	/* Extract Package Data */
-
-	result =
-	    acpi_battery_extract_package(battery, package, &format, data,
-					 "_BST");
-	if (result)
-		goto end;
-
-      end:
+	result = extract_package(battery, buffer.pointer,
+				 state_offsets, ARRAY_SIZE(state_offsets));
+	battery->update_time = jiffies;
 	kfree(buffer.pointer);
-
 	return result;
 }
 
-static int acpi_battery_get_alarm(struct acpi_battery *battery)
-{
-	battery->update_time[ACPI_BATTERY_ALARM] = get_seconds();
-
-	return 0;
-}
-
-static int acpi_battery_set_alarm(struct acpi_battery *battery,
-				  unsigned long alarm)
+static int acpi_battery_set_alarm(struct acpi_battery *battery)
 {
 	acpi_status status = 0;
-	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+	union acpi_object arg0 = { .type = ACPI_TYPE_INTEGER };
 	struct acpi_object_list arg_list = { 1, &arg0 };
 
-	battery->update_time[ACPI_BATTERY_ALARM] = get_seconds();
-
-	if (!acpi_battery_present(battery))
+	if (!acpi_battery_present(battery)|| !battery->alarm_present)
 		return -ENODEV;
 
-	if (!battery->flags.alarm_present)
-		return -ENODEV;
+	arg0.integer.value = battery->alarm;
 
-	arg0.integer.value = alarm;
-
-	status =
-	    acpi_evaluate_object(acpi_battery_handle(battery), "_BTP",
+	mutex_lock(&battery->lock);
+	status = acpi_evaluate_object(battery->device->handle, "_BTP",
 				 &arg_list, NULL);
+	mutex_unlock(&battery->lock);
+
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Alarm set to %d\n", (u32) alarm));
-
-	battery->alarm = alarm;
-
+	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Alarm set to %d\n", battery->alarm));
 	return 0;
 }
 
 static int acpi_battery_init_alarm(struct acpi_battery *battery)
 {
-	int result = 0;
 	acpi_status status = AE_OK;
 	acpi_handle handle = NULL;
-	struct acpi_battery_info *bif = battery->bif_data.pointer;
-	unsigned long alarm = battery->alarm;
 
 	/* See if alarms are supported, and if so, set default */
-
-	status = acpi_get_handle(acpi_battery_handle(battery), "_BTP", &handle);
-	if (ACPI_SUCCESS(status)) {
-		battery->flags.alarm_present = 1;
-		if (!alarm && bif) {
-			alarm = bif->design_capacity_warning;
-		}
-		result = acpi_battery_set_alarm(battery, alarm);
-		if (result)
-			goto end;
-	} else {
-		battery->flags.alarm_present = 0;
+	status = acpi_get_handle(battery->device->handle, "_BTP", &handle);
+	if (ACPI_FAILURE(status)) {
+		battery->alarm_present = 0;
+		return 0;
 	}
-
-      end:
-
-	return result;
+	battery->alarm_present = 1;
+	if (!battery->alarm)
+		battery->alarm = battery->design_capacity_warning;
+	return acpi_battery_set_alarm(battery);
 }
 
-static int acpi_battery_init_update(struct acpi_battery *battery)
+static int acpi_battery_update(struct acpi_battery *battery)
 {
-	int result = 0;
-
-	result = acpi_battery_get_status(battery);
-	if (result)
+	int saved_present = acpi_battery_present(battery);
+	int result = acpi_battery_get_status(battery);
+	if (result || !acpi_battery_present(battery))
 		return result;
-
-	battery->flags.battery_present_prev = acpi_battery_present(battery);
-
-	if (acpi_battery_present(battery)) {
+	if (saved_present != acpi_battery_present(battery) ||
+	    !battery->update_time) {
+		battery->update_time = 0;
 		result = acpi_battery_get_info(battery);
 		if (result)
 			return result;
-		result = acpi_battery_get_state(battery);
-		if (result)
-			return result;
-
+		if (battery->power_unit) {
+			battery->bat.properties = charge_battery_props;
+			battery->bat.num_properties =
+				ARRAY_SIZE(charge_battery_props);
+		} else {
+			battery->bat.properties = energy_battery_props;
+			battery->bat.num_properties =
+				ARRAY_SIZE(energy_battery_props);
+		}
 		acpi_battery_init_alarm(battery);
 	}
-
-	return result;
-}
-
-static int acpi_battery_update(struct acpi_battery *battery,
-			       int update, int *update_result_ptr)
-{
-	int result = 0;
-	int update_result = ACPI_BATTERY_NONE_UPDATE;
-
-	if (!acpi_battery_present(battery)) {
-		update = 1;
-	}
-
-	if (battery->flags.init_update) {
-		result = acpi_battery_init_update(battery);
-		if (result)
-			goto end;
-		update_result = ACPI_BATTERY_INIT_UPDATE;
-	} else if (update) {
-		result = acpi_battery_get_status(battery);
-		if (result)
-			goto end;
-		if ((!battery->flags.battery_present_prev & acpi_battery_present(battery))
-		    || (battery->flags.battery_present_prev & !acpi_battery_present(battery))) {
-			result = acpi_battery_init_update(battery);
-			if (result)
-				goto end;
-			update_result = ACPI_BATTERY_INIT_UPDATE;
-		} else {
-			update_result = ACPI_BATTERY_EASY_UPDATE;
-		}
-	}
-
-      end:
-
-	battery->flags.init_update = (result != 0);
-
-	*update_result_ptr = update_result;
-
-	return result;
-}
-
-static void acpi_battery_notify_update(struct acpi_battery *battery)
-{
-	acpi_battery_get_status(battery);
-
-	if (battery->flags.init_update) {
-		return;
-	}
-
-	if ((!battery->flags.battery_present_prev &
-	     acpi_battery_present(battery)) ||
-	    (battery->flags.battery_present_prev &
-	     !acpi_battery_present(battery))) {
-		battery->flags.init_update = 1;
-	} else {
-		battery->flags.update[ACPI_BATTERY_INFO] = 1;
-		battery->flags.update[ACPI_BATTERY_STATE] = 1;
-		battery->flags.update[ACPI_BATTERY_ALARM] = 1;
-	}
+	return acpi_battery_get_state(battery);
 }
 
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
 
+#ifdef CONFIG_ACPI_PROCFS
 static struct proc_dir_entry *acpi_battery_dir;
 
 static int acpi_battery_print_info(struct seq_file *seq, int result)
 {
 	struct acpi_battery *battery = seq->private;
-	struct acpi_battery_info *bif = NULL;
-	char *units = "?";
 
 	if (result)
 		goto end;
 
-	if (acpi_battery_present(battery))
-		seq_printf(seq, "present:                 yes\n");
-	else {
-		seq_printf(seq, "present:                 no\n");
+	seq_printf(seq, "present:                 %s\n",
+		   acpi_battery_present(battery)?"yes":"no");
+	if (!acpi_battery_present(battery))
 		goto end;
-	}
-
-	bif = battery->bif_data.pointer;
-	if (!bif) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BIF buffer is NULL"));
-		result = -ENODEV;
-		goto end;
-	}
-
-	/* Battery Units */
-
-	units = acpi_battery_power_units(battery);
-
-	if (bif->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "design capacity:         unknown\n");
 	else
 		seq_printf(seq, "design capacity:         %d %sh\n",
-			   (u32) bif->design_capacity, units);
+			   battery->design_capacity,
+			   acpi_battery_units(battery));
 
-	if (bif->last_full_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->full_charge_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "last full capacity:      unknown\n");
 	else
 		seq_printf(seq, "last full capacity:      %d %sh\n",
-			   (u32) bif->last_full_capacity, units);
+			   battery->full_charge_capacity,
+			   acpi_battery_units(battery));
 
-	switch ((u32) bif->battery_technology) {
-	case 0:
-		seq_printf(seq, "battery technology:      non-rechargeable\n");
-		break;
-	case 1:
-		seq_printf(seq, "battery technology:      rechargeable\n");
-		break;
-	default:
-		seq_printf(seq, "battery technology:      unknown\n");
-		break;
-	}
+	seq_printf(seq, "battery technology:      %srechargeable\n",
+		   (!battery->technology)?"non-":"");
 
-	if (bif->design_voltage == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->design_voltage == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "design voltage:          unknown\n");
 	else
 		seq_printf(seq, "design voltage:          %d mV\n",
-			   (u32) bif->design_voltage);
+			   battery->design_voltage);
 	seq_printf(seq, "design capacity warning: %d %sh\n",
-		   (u32) bif->design_capacity_warning, units);
+		   battery->design_capacity_warning,
+		   acpi_battery_units(battery));
 	seq_printf(seq, "design capacity low:     %d %sh\n",
-		   (u32) bif->design_capacity_low, units);
+		   battery->design_capacity_low,
+		   acpi_battery_units(battery));
 	seq_printf(seq, "capacity granularity 1:  %d %sh\n",
-		   (u32) bif->battery_capacity_granularity_1, units);
+		   battery->capacity_granularity_1,
+		   acpi_battery_units(battery));
 	seq_printf(seq, "capacity granularity 2:  %d %sh\n",
-		   (u32) bif->battery_capacity_granularity_2, units);
-	seq_printf(seq, "model number:            %s\n", bif->model_number);
-	seq_printf(seq, "serial number:           %s\n", bif->serial_number);
-	seq_printf(seq, "battery type:            %s\n", bif->battery_type);
-	seq_printf(seq, "OEM info:                %s\n", bif->oem_info);
-
+		   battery->capacity_granularity_2,
+		   acpi_battery_units(battery));
+	seq_printf(seq, "model number:            %s\n", battery->model_number);
+	seq_printf(seq, "serial number:           %s\n", battery->serial_number);
+	seq_printf(seq, "battery type:            %s\n", battery->type);
+	seq_printf(seq, "OEM info:                %s\n", battery->oem_info);
       end:
-
 	if (result)
 		seq_printf(seq, "ERROR: Unable to read battery info\n");
-
 	return result;
 }
 
 static int acpi_battery_print_state(struct seq_file *seq, int result)
 {
 	struct acpi_battery *battery = seq->private;
-	struct acpi_battery_state *bst = NULL;
-	char *units = "?";
 
 	if (result)
 		goto end;
 
-	if (acpi_battery_present(battery))
-		seq_printf(seq, "present:                 yes\n");
-	else {
-		seq_printf(seq, "present:                 no\n");
+	seq_printf(seq, "present:                 %s\n",
+		   acpi_battery_present(battery)?"yes":"no");
+	if (!acpi_battery_present(battery))
 		goto end;
-	}
 
-	bst = battery->bst_data.pointer;
-	if (!bst) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BST buffer is NULL"));
-		result = -ENODEV;
-		goto end;
-	}
-
-	/* Battery Units */
-
-	units = acpi_battery_power_units(battery);
-
-	if (!(bst->state & 0x04))
-		seq_printf(seq, "capacity state:          ok\n");
-	else
-		seq_printf(seq, "capacity state:          critical\n");
-
-	if ((bst->state & 0x01) && (bst->state & 0x02)) {
+	seq_printf(seq, "capacity state:          %s\n",
+			(battery->state & 0x04)?"critical":"ok");
+	if ((battery->state & 0x01) && (battery->state & 0x02))
 		seq_printf(seq,
 			   "charging state:          charging/discharging\n");
-	} else if (bst->state & 0x01)
+	else if (battery->state & 0x01)
 		seq_printf(seq, "charging state:          discharging\n");
-	else if (bst->state & 0x02)
+	else if (battery->state & 0x02)
 		seq_printf(seq, "charging state:          charging\n");
-	else {
+	else
 		seq_printf(seq, "charging state:          charged\n");
-	}
 
-	if (bst->present_rate == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->current_now == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "present rate:            unknown\n");
 	else
 		seq_printf(seq, "present rate:            %d %s\n",
-			   (u32) bst->present_rate, units);
+			   battery->current_now, acpi_battery_units(battery));
 
-	if (bst->remaining_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->capacity_now == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "remaining capacity:      unknown\n");
 	else
 		seq_printf(seq, "remaining capacity:      %d %sh\n",
-			   (u32) bst->remaining_capacity, units);
-
-	if (bst->present_voltage == ACPI_BATTERY_VALUE_UNKNOWN)
+			   battery->capacity_now, acpi_battery_units(battery));
+	if (battery->voltage_now == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "present voltage:         unknown\n");
 	else
 		seq_printf(seq, "present voltage:         %d mV\n",
-			   (u32) bst->present_voltage);
-
+			   battery->voltage_now);
       end:
-
-	if (result) {
+	if (result)
 		seq_printf(seq, "ERROR: Unable to read battery state\n");
-	}
 
 	return result;
 }
@@ -616,7 +523,6 @@
 static int acpi_battery_print_alarm(struct seq_file *seq, int result)
 {
 	struct acpi_battery *battery = seq->private;
-	char *units = "?";
 
 	if (result)
 		goto end;
@@ -625,189 +531,121 @@
 		seq_printf(seq, "present:                 no\n");
 		goto end;
 	}
-
-	/* Battery Units */
-
-	units = acpi_battery_power_units(battery);
-
 	seq_printf(seq, "alarm:                   ");
 	if (!battery->alarm)
 		seq_printf(seq, "unsupported\n");
 	else
-		seq_printf(seq, "%lu %sh\n", battery->alarm, units);
-
+		seq_printf(seq, "%u %sh\n", battery->alarm,
+				acpi_battery_units(battery));
       end:
-
 	if (result)
 		seq_printf(seq, "ERROR: Unable to read battery alarm\n");
-
 	return result;
 }
 
-static ssize_t
-acpi_battery_write_alarm(struct file *file,
-			 const char __user * buffer,
-			 size_t count, loff_t * ppos)
+static ssize_t acpi_battery_write_alarm(struct file *file,
+					const char __user * buffer,
+					size_t count, loff_t * ppos)
 {
 	int result = 0;
 	char alarm_string[12] = { '\0' };
 	struct seq_file *m = file->private_data;
 	struct acpi_battery *battery = m->private;
-	int update_result = ACPI_BATTERY_NONE_UPDATE;
 
 	if (!battery || (count > sizeof(alarm_string) - 1))
 		return -EINVAL;
-
-	mutex_lock(&battery->mutex);
-
-	result = acpi_battery_update(battery, 1, &update_result);
 	if (result) {
 		result = -ENODEV;
 		goto end;
 	}
-
 	if (!acpi_battery_present(battery)) {
 		result = -ENODEV;
 		goto end;
 	}
-
 	if (copy_from_user(alarm_string, buffer, count)) {
 		result = -EFAULT;
 		goto end;
 	}
-
 	alarm_string[count] = '\0';
-
-	result = acpi_battery_set_alarm(battery,
-					simple_strtoul(alarm_string, NULL, 0));
-	if (result)
-		goto end;
-
+	battery->alarm = simple_strtol(alarm_string, NULL, 0);
+	result = acpi_battery_set_alarm(battery);
       end:
-
-	acpi_battery_check_result(battery, result);
-
 	if (!result)
-		result = count;
-
-	mutex_unlock(&battery->mutex);
-
+		return count;
 	return result;
 }
 
 typedef int(*print_func)(struct seq_file *seq, int result);
-typedef int(*get_func)(struct acpi_battery *battery);
 
-static struct acpi_read_mux {
-	print_func print;
-	get_func get;
-} acpi_read_funcs[ACPI_BATTERY_NUMFILES] = {
-	{.get = acpi_battery_get_info, .print = acpi_battery_print_info},
-	{.get = acpi_battery_get_state, .print = acpi_battery_print_state},
-	{.get = acpi_battery_get_alarm, .print = acpi_battery_print_alarm},
+static print_func acpi_print_funcs[ACPI_BATTERY_NUMFILES] = {
+	acpi_battery_print_info,
+	acpi_battery_print_state,
+	acpi_battery_print_alarm,
 };
 
 static int acpi_battery_read(int fid, struct seq_file *seq)
 {
 	struct acpi_battery *battery = seq->private;
-	int result = 0;
-	int update_result = ACPI_BATTERY_NONE_UPDATE;
-	int update = 0;
+	int result = acpi_battery_update(battery);
+	return acpi_print_funcs[fid](seq, result);
+}
 
-	mutex_lock(&battery->mutex);
+#define DECLARE_FILE_FUNCTIONS(_name) \
+static int acpi_battery_read_##_name(struct seq_file *seq, void *offset) \
+{ \
+	return acpi_battery_read(_name##_tag, seq); \
+} \
+static int acpi_battery_##_name##_open_fs(struct inode *inode, struct file *file) \
+{ \
+	return single_open(file, acpi_battery_read_##_name, PDE(inode)->data); \
+}
 
-	update = (get_seconds() - battery->update_time[fid] >= update_time);
-	update = (update | battery->flags.update[fid]);
+DECLARE_FILE_FUNCTIONS(info);
+DECLARE_FILE_FUNCTIONS(state);
+DECLARE_FILE_FUNCTIONS(alarm);
 
-	result = acpi_battery_update(battery, update, &update_result);
-	if (result)
-		goto end;
+#undef DECLARE_FILE_FUNCTIONS
 
-	if (update_result == ACPI_BATTERY_EASY_UPDATE) {
-		result = acpi_read_funcs[fid].get(battery);
-		if (result)
-			goto end;
+#define FILE_DESCRIPTION_RO(_name) \
+	{ \
+	.name = __stringify(_name), \
+	.mode = S_IRUGO, \
+	.ops = { \
+		.open = acpi_battery_##_name##_open_fs, \
+		.read = seq_read, \
+		.llseek = seq_lseek, \
+		.release = single_release, \
+		.owner = THIS_MODULE, \
+		}, \
 	}
 
-      end:
-	result = acpi_read_funcs[fid].print(seq, result);
-	acpi_battery_check_result(battery, result);
-	battery->flags.update[fid] = result;
-	mutex_unlock(&battery->mutex);
-	return result;
-}
-
-static int acpi_battery_read_info(struct seq_file *seq, void *offset)
-{
-	return acpi_battery_read(ACPI_BATTERY_INFO, seq);
-}
-
-static int acpi_battery_read_state(struct seq_file *seq, void *offset)
-{
-	return acpi_battery_read(ACPI_BATTERY_STATE, seq);
-}
-
-static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
-{
-	return acpi_battery_read(ACPI_BATTERY_ALARM, seq);
-}
-
-static int acpi_battery_info_open_fs(struct inode *inode, struct file *file)
-{
-	return single_open(file, acpi_battery_read_info, PDE(inode)->data);
-}
-
-static int acpi_battery_state_open_fs(struct inode *inode, struct file *file)
-{
-	return single_open(file, acpi_battery_read_state, PDE(inode)->data);
-}
-
-static int acpi_battery_alarm_open_fs(struct inode *inode, struct file *file)
-{
-	return single_open(file, acpi_battery_read_alarm, PDE(inode)->data);
-}
+#define FILE_DESCRIPTION_RW(_name) \
+	{ \
+	.name = __stringify(_name), \
+	.mode = S_IFREG | S_IRUGO | S_IWUSR, \
+	.ops = { \
+		.open = acpi_battery_##_name##_open_fs, \
+		.read = seq_read, \
+		.llseek = seq_lseek, \
+		.write = acpi_battery_write_##_name, \
+		.release = single_release, \
+		.owner = THIS_MODULE, \
+		}, \
+	}
 
 static struct battery_file {
 	struct file_operations ops;
 	mode_t mode;
 	char *name;
 } acpi_battery_file[] = {
-	{
-	.name = "info",
-	.mode = S_IRUGO,
-	.ops = {
-	.open = acpi_battery_info_open_fs,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-	},
-	},
-	{
-	.name = "state",
-	.mode = S_IRUGO,
-	.ops = {
-	.open = acpi_battery_state_open_fs,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-	},
-	},
-	{
-	.name = "alarm",
-	.mode = S_IFREG | S_IRUGO | S_IWUSR,
-	.ops = {
-	.open = acpi_battery_alarm_open_fs,
-	.read = seq_read,
-	.write = acpi_battery_write_alarm,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-	},
-	},
+	FILE_DESCRIPTION_RO(info),
+	FILE_DESCRIPTION_RO(state),
+	FILE_DESCRIPTION_RW(alarm),
 };
 
+#undef FILE_DESCRIPTION_RO
+#undef FILE_DESCRIPTION_RW
+
 static int acpi_battery_add_fs(struct acpi_device *device)
 {
 	struct proc_dir_entry *entry = NULL;
@@ -832,25 +670,51 @@
 			entry->owner = THIS_MODULE;
 		}
 	}
-
 	return 0;
 }
 
-static int acpi_battery_remove_fs(struct acpi_device *device)
+static void acpi_battery_remove_fs(struct acpi_device *device)
 {
 	int i;
-	if (acpi_device_dir(device)) {
-		for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) {
-			remove_proc_entry(acpi_battery_file[i].name,
+	if (!acpi_device_dir(device))
+		return;
+	for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i)
+		remove_proc_entry(acpi_battery_file[i].name,
 				  acpi_device_dir(device));
-		}
-		remove_proc_entry(acpi_device_bid(device), acpi_battery_dir);
-		acpi_device_dir(device) = NULL;
-	}
 
-	return 0;
+	remove_proc_entry(acpi_device_bid(device), acpi_battery_dir);
+	acpi_device_dir(device) = NULL;
 }
 
+#endif
+
+static ssize_t acpi_battery_alarm_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev));
+	return sprintf(buf, "%d\n", battery->alarm * 1000);
+}
+
+static ssize_t acpi_battery_alarm_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	unsigned long x;
+	struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev));
+	if (sscanf(buf, "%ld\n", &x) == 1)
+		battery->alarm = x/1000;
+	if (acpi_battery_present(battery))
+		acpi_battery_set_alarm(battery);
+	return count;
+}
+
+static struct device_attribute alarm_attr = {
+	.attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+	.show = acpi_battery_alarm_show,
+	.store = acpi_battery_alarm_store,
+};
+
 /* --------------------------------------------------------------------------
                                  Driver Interface
    -------------------------------------------------------------------------- */
@@ -858,33 +722,17 @@
 static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
 {
 	struct acpi_battery *battery = data;
-	struct acpi_device *device = NULL;
-
+	struct acpi_device *device;
 	if (!battery)
 		return;
-
 	device = battery->device;
-
-	switch (event) {
-	case ACPI_BATTERY_NOTIFY_STATUS:
-	case ACPI_BATTERY_NOTIFY_INFO:
-	case ACPI_NOTIFY_BUS_CHECK:
-	case ACPI_NOTIFY_DEVICE_CHECK:
-		device = battery->device;
-		acpi_battery_notify_update(battery);
-		acpi_bus_generate_proc_event(device, event,
+	acpi_battery_update(battery);
+	acpi_bus_generate_proc_event(device, event,
+				     acpi_battery_present(battery));
+	acpi_bus_generate_netlink_event(device->pnp.device_class,
+					device->dev.bus_id, event,
 					acpi_battery_present(battery));
-		acpi_bus_generate_netlink_event(device->pnp.device_class,
-						  device->dev.bus_id, event,
-						  acpi_battery_present(battery));
-		break;
-	default:
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "Unsupported event [0x%x]\n", event));
-		break;
-	}
-
-	return;
+	kobject_uevent(&battery->bat.dev->kobj, KOBJ_CHANGE);
 }
 
 static int acpi_battery_add(struct acpi_device *device)
@@ -892,33 +740,27 @@
 	int result = 0;
 	acpi_status status = 0;
 	struct acpi_battery *battery = NULL;
-
 	if (!device)
 		return -EINVAL;
-
 	battery = kzalloc(sizeof(struct acpi_battery), GFP_KERNEL);
 	if (!battery)
 		return -ENOMEM;
-
-	mutex_init(&battery->mutex);
-
-	mutex_lock(&battery->mutex);
-
 	battery->device = device;
 	strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS);
 	acpi_driver_data(device) = battery;
-
-	result = acpi_battery_get_status(battery);
-	if (result)
-		goto end;
-
-	battery->flags.init_update = 1;
-
+	mutex_init(&battery->lock);
+	acpi_battery_update(battery);
+#ifdef CONFIG_ACPI_PROCFS
 	result = acpi_battery_add_fs(device);
 	if (result)
 		goto end;
-
+#endif
+	battery->bat.name = acpi_device_bid(device);
+	battery->bat.type = POWER_SUPPLY_TYPE_BATTERY;
+	battery->bat.get_property = acpi_battery_get_property;
+	result = power_supply_register(&battery->device->dev, &battery->bat);
+	result = device_create_file(battery->bat.dev, &alarm_attr);
 	status = acpi_install_notify_handler(device->handle,
 					     ACPI_ALL_NOTIFY,
 					     acpi_battery_notify, battery);
@@ -927,20 +769,16 @@
 		result = -ENODEV;
 		goto end;
 	}
-
 	printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
 	       ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
 	       device->status.battery_present ? "present" : "absent");
-
       end:
-
 	if (result) {
+#ifdef CONFIG_ACPI_PROCFS
 		acpi_battery_remove_fs(device);
+#endif
 		kfree(battery);
 	}
-
-	mutex_unlock(&battery->mutex);
-
 	return result;
 }
 
@@ -951,27 +789,19 @@
 
 	if (!device || !acpi_driver_data(device))
 		return -EINVAL;
-
 	battery = acpi_driver_data(device);
-
-	mutex_lock(&battery->mutex);
-
 	status = acpi_remove_notify_handler(device->handle,
 					    ACPI_ALL_NOTIFY,
 					    acpi_battery_notify);
-
+#ifdef CONFIG_ACPI_PROCFS
 	acpi_battery_remove_fs(device);
-
-	kfree(battery->bif_data.pointer);
-
-	kfree(battery->bst_data.pointer);
-
-	mutex_unlock(&battery->mutex);
-
-	mutex_destroy(&battery->mutex);
-
+#endif
+	if (battery->bat.dev) {
+		device_remove_file(battery->bat.dev, &alarm_attr);
+		power_supply_unregister(&battery->bat);
+	}
+	mutex_destroy(&battery->lock);
 	kfree(battery);
-
 	return 0;
 }
 
@@ -979,44 +809,48 @@
 static int acpi_battery_resume(struct acpi_device *device)
 {
 	struct acpi_battery *battery;
-
 	if (!device)
 		return -EINVAL;
-
-	battery = device->driver_data;
-
-	battery->flags.init_update = 1;
-
+	battery = acpi_driver_data(device);
+	battery->update_time = 0;
 	return 0;
 }
 
+static struct acpi_driver acpi_battery_driver = {
+	.name = "battery",
+	.class = ACPI_BATTERY_CLASS,
+	.ids = battery_device_ids,
+	.ops = {
+		.add = acpi_battery_add,
+		.resume = acpi_battery_resume,
+		.remove = acpi_battery_remove,
+		},
+};
+
 static int __init acpi_battery_init(void)
 {
-	int result;
-
 	if (acpi_disabled)
 		return -ENODEV;
-
+#ifdef CONFIG_ACPI_PROCFS
 	acpi_battery_dir = acpi_lock_battery_dir();
 	if (!acpi_battery_dir)
 		return -ENODEV;
-
-	result = acpi_bus_register_driver(&acpi_battery_driver);
-	if (result < 0) {
+#endif
+	if (acpi_bus_register_driver(&acpi_battery_driver) < 0) {
+#ifdef CONFIG_ACPI_PROCFS
 		acpi_unlock_battery_dir(acpi_battery_dir);
+#endif
 		return -ENODEV;
 	}
-
 	return 0;
 }
 
 static void __exit acpi_battery_exit(void)
 {
 	acpi_bus_unregister_driver(&acpi_battery_driver);
-
+#ifdef CONFIG_ACPI_PROCFS
 	acpi_unlock_battery_dir(acpi_battery_dir);
-
-	return;
+#endif
 }
 
 module_init(acpi_battery_init);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index cbfc815..fb2cff9 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -286,15 +286,11 @@
 
 extern int event_is_open;
 
-int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data)
+int acpi_bus_generate_proc_event4(const char *device_class, const char *bus_id, u8 type, int data)
 {
-	struct acpi_bus_event *event = NULL;
+	struct acpi_bus_event *event;
 	unsigned long flags = 0;
 
-
-	if (!device)
-		return -EINVAL;
-
 	/* drop event on the floor if no one's listening */
 	if (!event_is_open)
 		return 0;
@@ -303,8 +299,8 @@
 	if (!event)
 		return -ENOMEM;
 
-	strcpy(event->device_class, device->pnp.device_class);
-	strcpy(event->bus_id, device->pnp.bus_id);
+	strcpy(event->device_class, device_class);
+	strcpy(event->bus_id, bus_id);
 	event->type = type;
 	event->data = data;
 
@@ -315,6 +311,17 @@
 	wake_up_interruptible(&acpi_bus_event_queue);
 
 	return 0;
+
+}
+
+EXPORT_SYMBOL_GPL(acpi_bus_generate_proc_event4);
+
+int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data)
+{
+	if (!device)
+		return -EINVAL;
+	return acpi_bus_generate_proc_event4(device->pnp.device_class,
+					     device->pnp.bus_id, type, data);
 }
 
 EXPORT_SYMBOL(acpi_bus_generate_proc_event);
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 2e79a33..301e832 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -434,18 +434,18 @@
 	switch (button->type) {
 	case ACPI_BUTTON_TYPE_POWER:
 	case ACPI_BUTTON_TYPE_POWERF:
-		input->evbit[0] = BIT(EV_KEY);
+		input->evbit[0] = BIT_MASK(EV_KEY);
 		set_bit(KEY_POWER, input->keybit);
 		break;
 
 	case ACPI_BUTTON_TYPE_SLEEP:
 	case ACPI_BUTTON_TYPE_SLEEPF:
-		input->evbit[0] = BIT(EV_KEY);
+		input->evbit[0] = BIT_MASK(EV_KEY);
 		set_bit(KEY_SLEEP, input->keybit);
 		break;
 
 	case ACPI_BUTTON_TYPE_LID:
-		input->evbit[0] = BIT(EV_SW);
+		input->evbit[0] = BIT_MASK(EV_SW);
 		set_bit(SW_LID, input->swbit);
 		break;
 	}
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 3f7935a..7b41783 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -121,6 +121,7 @@
 	atomic_t event_count;
 	wait_queue_head_t wait;
 	struct list_head list;
+	u8 handlers_installed;
 } *boot_ec, *first_ec;
 
 /* --------------------------------------------------------------------------
@@ -425,7 +426,7 @@
 	handler->func = func;
 	handler->data = data;
 	mutex_lock(&ec->lock);
-	list_add_tail(&handler->node, &ec->list);
+	list_add(&handler->node, &ec->list);
 	mutex_unlock(&ec->lock);
 	return 0;
 }
@@ -440,7 +441,6 @@
 		if (query_bit == handler->query_bit) {
 			list_del(&handler->node);
 			kfree(handler);
-			break;
 		}
 	}
 	mutex_unlock(&ec->lock);
@@ -680,32 +680,50 @@
 	status = acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe);
 	if (ACPI_FAILURE(status))
 		return status;
-
 	/* Find and register all query methods */
 	acpi_walk_namespace(ACPI_TYPE_METHOD, handle, 1,
 			    acpi_ec_register_query_methods, ec, NULL);
-
 	/* Use the global lock for all EC transactions? */
 	acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock);
-
 	ec->handle = handle;
-
-	printk(KERN_INFO PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx\n",
-			  ec->gpe, ec->command_addr, ec->data_addr);
-
 	return AE_CTRL_TERMINATE;
 }
 
+static void ec_remove_handlers(struct acpi_ec *ec)
+{
+	if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
+				ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
+		printk(KERN_ERR PREFIX "failed to remove space handler\n");
+	if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
+				&acpi_ec_gpe_handler)))
+		printk(KERN_ERR PREFIX "failed to remove gpe handler\n");
+	ec->handlers_installed = 0;
+}
+
 static int acpi_ec_add(struct acpi_device *device)
 {
 	struct acpi_ec *ec = NULL;
 
 	if (!device)
 		return -EINVAL;
-
 	strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_EC_CLASS);
 
+	/* Check for boot EC */
+	if (boot_ec) {
+		if (boot_ec->handle == device->handle) {
+			/* Pre-loaded EC from DSDT, just move pointer */
+			ec = boot_ec;
+			boot_ec = NULL;
+			goto end;
+		} else if (boot_ec->handle == ACPI_ROOT_OBJECT) {
+			/* ECDT-based EC, time to shut it down */
+			ec_remove_handlers(boot_ec);
+			kfree(boot_ec);
+			first_ec = boot_ec = NULL;
+		}
+	}
+
 	ec = make_acpi_ec();
 	if (!ec)
 		return -ENOMEM;
@@ -715,25 +733,14 @@
 		kfree(ec);
 		return -EINVAL;
 	}
-
-	/* Check if we found the boot EC */
-	if (boot_ec) {
-		if (boot_ec->gpe == ec->gpe) {
-			/* We might have incorrect info for GL at boot time */
-			mutex_lock(&boot_ec->lock);
-			boot_ec->global_lock = ec->global_lock;
-			/* Copy handlers from new ec into boot ec */
-			list_splice(&ec->list, &boot_ec->list);
-			mutex_unlock(&boot_ec->lock);
-			kfree(ec);
-			ec = boot_ec;
-		}
-	} else
-		first_ec = ec;
 	ec->handle = device->handle;
+      end:
+	if (!first_ec)
+		first_ec = ec;
 	acpi_driver_data(device) = ec;
-
 	acpi_ec_add_fs(device);
+	printk(KERN_INFO PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx\n",
+			  ec->gpe, ec->command_addr, ec->data_addr);
 	return 0;
 }
 
@@ -756,10 +763,7 @@
 	acpi_driver_data(device) = NULL;
 	if (ec == first_ec)
 		first_ec = NULL;
-
-	/* Don't touch boot EC */
-	if (boot_ec != ec)
-		kfree(ec);
+	kfree(ec);
 	return 0;
 }
 
@@ -789,6 +793,8 @@
 static int ec_install_handlers(struct acpi_ec *ec)
 {
 	acpi_status status;
+	if (ec->handlers_installed)
+		return 0;
 	status = acpi_install_gpe_handler(NULL, ec->gpe,
 					  ACPI_GPE_EDGE_TRIGGERED,
 					  &acpi_ec_gpe_handler, ec);
@@ -807,6 +813,7 @@
 		return -ENODEV;
 	}
 
+	ec->handlers_installed = 1;
 	return 0;
 }
 
@@ -823,41 +830,22 @@
 	if (!ec)
 		return -EINVAL;
 
-	/* Boot EC is already working */
-	if (ec != boot_ec)
-		ret = ec_install_handlers(ec);
+	ret = ec_install_handlers(ec);
 
 	/* EC is fully operational, allow queries */
 	atomic_set(&ec->query_pending, 0);
-
 	return ret;
 }
 
 static int acpi_ec_stop(struct acpi_device *device, int type)
 {
-	acpi_status status;
 	struct acpi_ec *ec;
-
 	if (!device)
 		return -EINVAL;
-
 	ec = acpi_driver_data(device);
 	if (!ec)
 		return -EINVAL;
-
-	/* Don't touch boot EC */
-	if (ec == boot_ec)
-		return 0;
-
-	status = acpi_remove_address_space_handler(ec->handle,
-						   ACPI_ADR_SPACE_EC,
-						   &acpi_ec_space_handler);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	status = acpi_remove_gpe_handler(NULL, ec->gpe, &acpi_ec_gpe_handler);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
+	ec_remove_handlers(ec);
 
 	return 0;
 }
@@ -877,7 +865,7 @@
 	status = acpi_get_table(ACPI_SIG_ECDT, 1,
 				(struct acpi_table_header **)&ecdt_ptr);
 	if (ACPI_SUCCESS(status)) {
-		printk(KERN_INFO PREFIX "EC description table is found, configuring boot EC\n\n");
+		printk(KERN_INFO PREFIX "EC description table is found, configuring boot EC\n");
 		boot_ec->command_addr = ecdt_ptr->control.address;
 		boot_ec->data_addr = ecdt_ptr->data.address;
 		boot_ec->gpe = ecdt_ptr->gpe;
@@ -899,7 +887,6 @@
       error:
 	kfree(boot_ec);
 	boot_ec = NULL;
-
 	return -ENODEV;
 }
 
diff --git a/drivers/acpi/events/evevent.c b/drivers/acpi/events/evevent.c
index a1f87b5..e412878 100644
--- a/drivers/acpi/events/evevent.c
+++ b/drivers/acpi/events/evevent.c
@@ -239,10 +239,8 @@
 	 * Read the fixed feature status and enable registers, as all the cases
 	 * depend on their values.  Ignore errors here.
 	 */
-	(void)acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-				    ACPI_REGISTER_PM1_STATUS, &fixed_status);
-	(void)acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-				    ACPI_REGISTER_PM1_ENABLE, &fixed_enable);
+	(void)acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &fixed_status);
+	(void)acpi_hw_register_read(ACPI_REGISTER_PM1_ENABLE, &fixed_enable);
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INTERRUPTS,
 			  "Fixed Event Block: Enable %08X Status %08X\n",
diff --git a/drivers/acpi/hardware/hwregs.c b/drivers/acpi/hardware/hwregs.c
index 1d371fa..73f9c5f 100644
--- a/drivers/acpi/hardware/hwregs.c
+++ b/drivers/acpi/hardware/hwregs.c
@@ -75,8 +75,7 @@
 
 	lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1_STATUS,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
 					ACPI_BITMASK_ALL_FIXED_STATUS);
 	if (ACPI_FAILURE(status)) {
 		goto unlock_and_exit;
@@ -259,7 +258,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_get_register(u32 register_id, u32 * return_value)
+acpi_status acpi_get_register_unlocked(u32 register_id, u32 * return_value)
 {
 	u32 register_value = 0;
 	struct acpi_bit_register_info *bit_reg_info;
@@ -276,8 +275,7 @@
 
 	/* Read from the register */
 
-	status = acpi_hw_register_read(ACPI_MTX_LOCK,
-				       bit_reg_info->parent_register,
+	status = acpi_hw_register_read(bit_reg_info->parent_register,
 				       &register_value);
 
 	if (ACPI_SUCCESS(status)) {
@@ -298,6 +296,16 @@
 	return_ACPI_STATUS(status);
 }
 
+acpi_status acpi_get_register(u32 register_id, u32 * return_value)
+{
+	acpi_status status;
+	acpi_cpu_flags flags;
+	flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
+	status = acpi_get_register_unlocked(register_id, return_value);
+	acpi_os_release_lock(acpi_gbl_hardware_lock, flags);
+	return status;
+}
+
 ACPI_EXPORT_SYMBOL(acpi_get_register)
 
 /*******************************************************************************
@@ -335,8 +343,7 @@
 
 	/* Always do a register read first so we can insert the new bits  */
 
-	status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-				       bit_reg_info->parent_register,
+	status = acpi_hw_register_read(bit_reg_info->parent_register,
 				       &register_value);
 	if (ACPI_FAILURE(status)) {
 		goto unlock_and_exit;
@@ -363,8 +370,7 @@
 						   bit_reg_info->
 						   access_bit_mask);
 		if (value) {
-			status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-							ACPI_REGISTER_PM1_STATUS,
+			status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
 							(u16) value);
 			register_value = 0;
 		}
@@ -377,8 +383,7 @@
 					   bit_reg_info->access_bit_mask,
 					   value);
 
-		status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						ACPI_REGISTER_PM1_ENABLE,
+		status = acpi_hw_register_write(ACPI_REGISTER_PM1_ENABLE,
 						(u16) register_value);
 		break;
 
@@ -397,15 +402,13 @@
 					   bit_reg_info->access_bit_mask,
 					   value);
 
-		status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						ACPI_REGISTER_PM1_CONTROL,
+		status = acpi_hw_register_write(ACPI_REGISTER_PM1_CONTROL,
 						(u16) register_value);
 		break;
 
 	case ACPI_REGISTER_PM2_CONTROL:
 
-		status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-					       ACPI_REGISTER_PM2_CONTROL,
+		status = acpi_hw_register_read(ACPI_REGISTER_PM2_CONTROL,
 					       &register_value);
 		if (ACPI_FAILURE(status)) {
 			goto unlock_and_exit;
@@ -430,8 +433,7 @@
 						     xpm2_control_block.
 						     address)));
 
-		status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						ACPI_REGISTER_PM2_CONTROL,
+		status = acpi_hw_register_write(ACPI_REGISTER_PM2_CONTROL,
 						(u8) (register_value));
 		break;
 
@@ -461,8 +463,7 @@
  *
  * FUNCTION:    acpi_hw_register_read
  *
- * PARAMETERS:  use_lock            - Lock hardware? True/False
- *              register_id         - ACPI Register ID
+ * PARAMETERS:  register_id         - ACPI Register ID
  *              return_value        - Where the register value is returned
  *
  * RETURN:      Status and the value read.
@@ -471,19 +472,14 @@
  *
  ******************************************************************************/
 acpi_status
-acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value)
+acpi_hw_register_read(u32 register_id, u32 * return_value)
 {
 	u32 value1 = 0;
 	u32 value2 = 0;
 	acpi_status status;
-	acpi_cpu_flags lock_flags = 0;
 
 	ACPI_FUNCTION_TRACE(hw_register_read);
 
-	if (ACPI_MTX_LOCK == use_lock) {
-		lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
-	}
-
 	switch (register_id) {
 	case ACPI_REGISTER_PM1_STATUS:	/* 16-bit access */
 
@@ -491,7 +487,7 @@
 		    acpi_hw_low_level_read(16, &value1,
 					   &acpi_gbl_FADT.xpm1a_event_block);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* PM1B is optional */
@@ -507,7 +503,7 @@
 		status =
 		    acpi_hw_low_level_read(16, &value1, &acpi_gbl_xpm1a_enable);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* PM1B is optional */
@@ -523,7 +519,7 @@
 		    acpi_hw_low_level_read(16, &value1,
 					   &acpi_gbl_FADT.xpm1a_control_block);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		status =
@@ -558,10 +554,7 @@
 		break;
 	}
 
-      unlock_and_exit:
-	if (ACPI_MTX_LOCK == use_lock) {
-		acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
-	}
+      exit:
 
 	if (ACPI_SUCCESS(status)) {
 		*return_value = value1;
@@ -574,8 +567,7 @@
  *
  * FUNCTION:    acpi_hw_register_write
  *
- * PARAMETERS:  use_lock            - Lock hardware? True/False
- *              register_id         - ACPI Register ID
+ * PARAMETERS:  register_id         - ACPI Register ID
  *              Value               - The value to write
  *
  * RETURN:      Status
@@ -597,28 +589,22 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value)
+acpi_status acpi_hw_register_write(u32 register_id, u32 value)
 {
 	acpi_status status;
-	acpi_cpu_flags lock_flags = 0;
 	u32 read_value;
 
 	ACPI_FUNCTION_TRACE(hw_register_write);
 
-	if (ACPI_MTX_LOCK == use_lock) {
-		lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
-	}
-
 	switch (register_id) {
 	case ACPI_REGISTER_PM1_STATUS:	/* 16-bit access */
 
 		/* Perform a read first to preserve certain bits (per ACPI spec) */
 
-		status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-					       ACPI_REGISTER_PM1_STATUS,
+		status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS,
 					       &read_value);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* Insert the bits to be preserved */
@@ -632,7 +618,7 @@
 		    acpi_hw_low_level_write(16, value,
 					    &acpi_gbl_FADT.xpm1a_event_block);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* PM1B is optional */
@@ -647,7 +633,7 @@
 		status =
 		    acpi_hw_low_level_write(16, value, &acpi_gbl_xpm1a_enable);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* PM1B is optional */
@@ -661,11 +647,10 @@
 		/*
 		 * Perform a read first to preserve certain bits (per ACPI spec)
 		 */
-		status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-					       ACPI_REGISTER_PM1_CONTROL,
+		status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL,
 					       &read_value);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* Insert the bits to be preserved */
@@ -679,7 +664,7 @@
 		    acpi_hw_low_level_write(16, value,
 					    &acpi_gbl_FADT.xpm1a_control_block);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		status =
@@ -728,11 +713,7 @@
 		break;
 	}
 
-      unlock_and_exit:
-	if (ACPI_MTX_LOCK == use_lock) {
-		acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
-	}
-
+      exit:
 	return_ACPI_STATUS(status);
 }
 
diff --git a/drivers/acpi/hardware/hwsleep.c b/drivers/acpi/hardware/hwsleep.c
index cf69c00..81b2484 100644
--- a/drivers/acpi/hardware/hwsleep.c
+++ b/drivers/acpi/hardware/hwsleep.c
@@ -234,15 +234,11 @@
 				"While executing method _SST"));
 	}
 
-	/*
-	 * 1) Disable/Clear all GPEs
-	 */
-	status = acpi_hw_disable_all_gpes();
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
+	/* Disable/Clear all GPEs */
 
-	return_ACPI_STATUS(AE_OK);
+	status = acpi_hw_disable_all_gpes();
+
+	return_ACPI_STATUS(status);
 }
 
 ACPI_EXPORT_SYMBOL(acpi_enter_sleep_state_prep)
@@ -313,8 +309,7 @@
 
 	/* Get current value of PM1A control */
 
-	status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-				       ACPI_REGISTER_PM1_CONTROL, &PM1Acontrol);
+	status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL, &PM1Acontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
@@ -341,15 +336,13 @@
 
 	/* Write #1: fill in SLP_TYP data */
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1A_CONTROL,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
 					PM1Acontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1B_CONTROL,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1B_CONTROL,
 					PM1Bcontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
@@ -364,15 +357,13 @@
 
 	ACPI_FLUSH_CPU_CACHE();
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1A_CONTROL,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
 					PM1Acontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1B_CONTROL,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1B_CONTROL,
 					PM1Bcontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
@@ -392,8 +383,7 @@
 		 */
 		acpi_os_stall(10000000);
 
-		status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						ACPI_REGISTER_PM1_CONTROL,
+		status = acpi_hw_register_write(ACPI_REGISTER_PM1_CONTROL,
 						sleep_enable_reg_info->
 						access_bit_mask);
 		if (ACPI_FAILURE(status)) {
@@ -404,7 +394,8 @@
 	/* Wait until we enter sleep state */
 
 	do {
-		status = acpi_get_register(ACPI_BITREG_WAKE_STATUS, &in_value);
+		status = acpi_get_register_unlocked(ACPI_BITREG_WAKE_STATUS,
+						    &in_value);
 		if (ACPI_FAILURE(status)) {
 			return_ACPI_STATUS(status);
 		}
@@ -520,8 +511,7 @@
 
 		/* Get current value of PM1A control */
 
-		status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-					       ACPI_REGISTER_PM1_CONTROL,
+		status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL,
 					       &PM1Acontrol);
 		if (ACPI_SUCCESS(status)) {
 
@@ -543,11 +533,9 @@
 
 			/* Just ignore any errors */
 
-			(void)acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						     ACPI_REGISTER_PM1A_CONTROL,
+			(void)acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
 						     PM1Acontrol);
-			(void)acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						     ACPI_REGISTER_PM1B_CONTROL,
+			(void)acpi_hw_register_write(ACPI_REGISTER_PM1B_CONTROL,
 						     PM1Bcontrol);
 		}
 	}
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 352cf81..aabc6ca 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1043,14 +1043,6 @@
 __setup("acpi_wake_gpes_always_on", acpi_wake_gpes_always_on_setup);
 
 /*
- * max_cstate is defined in the base kernel so modules can
- * change it w/o depending on the state of the processor module.
- */
-unsigned int max_cstate = ACPI_PROCESSOR_MAX_POWER;
-
-EXPORT_SYMBOL(max_cstate);
-
-/*
  * Acquire a spinlock.
  *
  * handle is a pointer to the spinlock_t.
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 9f11dc2..235a51e 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -44,6 +44,7 @@
 #include <linux/seq_file.h>
 #include <linux/dmi.h>
 #include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -421,12 +422,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_IA64
-#define arch_cpu_to_apicid 	ia64_cpu_to_sapicid
-#else
-#define arch_cpu_to_apicid 	x86_cpu_to_apicid
-#endif
-
 static int map_madt_entry(u32 acpi_id)
 {
 	unsigned long madt_end, entry;
@@ -500,7 +495,7 @@
 		return apic_id;
 
 	for (i = 0; i < NR_CPUS; ++i) {
-		if (arch_cpu_to_apicid[i] == apic_id)
+		if (cpu_physical_id(i) == apic_id)
 			return i;
 	}
 	return -1;
@@ -1049,11 +1044,13 @@
 		return -ENOMEM;
 	acpi_processor_dir->owner = THIS_MODULE;
 
+	result = cpuidle_register_driver(&acpi_idle_driver);
+	if (result < 0)
+		goto out_proc;
+
 	result = acpi_bus_register_driver(&acpi_processor_driver);
-	if (result < 0) {
-		remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
-		return result;
-	}
+	if (result < 0)
+		goto out_cpuidle;
 
 	acpi_processor_install_hotplug_notify();
 
@@ -1062,11 +1059,18 @@
 	acpi_processor_ppc_init();
 
 	return 0;
+
+out_cpuidle:
+	cpuidle_unregister_driver(&acpi_idle_driver);
+
+out_proc:
+	remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
+
+	return result;
 }
 
 static void __exit acpi_processor_exit(void)
 {
-
 	acpi_processor_ppc_exit();
 
 	acpi_thermal_cpufreq_exit();
@@ -1075,6 +1079,8 @@
 
 	acpi_bus_unregister_driver(&acpi_processor_driver);
 
+	cpuidle_unregister_driver(&acpi_idle_driver);
+
 	remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
 
 	return;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 1f6fb38d..f996d0e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -40,6 +40,7 @@
 #include <linux/sched.h>	/* need_resched() */
 #include <linux/latency.h>
 #include <linux/clockchips.h>
+#include <linux/cpuidle.h>
 
 /*
  * Include the apic definitions for x86 to have the APIC timer related defines
@@ -64,14 +65,22 @@
 #define ACPI_PROCESSOR_FILE_POWER	"power"
 #define US_TO_PM_TIMER_TICKS(t)		((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
 #define PM_TIMER_TICK_NS		(1000000000ULL/PM_TIMER_FREQUENCY)
+#ifndef CONFIG_CPU_IDLE
 #define C2_OVERHEAD			4	/* 1us (3.579 ticks per us) */
 #define C3_OVERHEAD			4	/* 1us (3.579 ticks per us) */
 static void (*pm_idle_save) (void) __read_mostly;
-module_param(max_cstate, uint, 0644);
+#else
+#define C2_OVERHEAD			1	/* 1us */
+#define C3_OVERHEAD			1	/* 1us */
+#endif
+#define PM_TIMER_TICKS_TO_US(p)		(((p) * 1000)/(PM_TIMER_FREQUENCY/1000))
 
+static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
+module_param(max_cstate, uint, 0000);
 static unsigned int nocst __read_mostly;
 module_param(nocst, uint, 0000);
 
+#ifndef CONFIG_CPU_IDLE
 /*
  * bm_history -- bit-mask with a bit per jiffy of bus-master activity
  * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
@@ -82,9 +91,10 @@
 static unsigned int bm_history __read_mostly =
     (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
 module_param(bm_history, uint, 0644);
-/* --------------------------------------------------------------------------
-                                Power Management
-   -------------------------------------------------------------------------- */
+
+static int acpi_processor_set_power_policy(struct acpi_processor *pr);
+
+#endif
 
 /*
  * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
@@ -177,6 +187,18 @@
 		return ((0xFFFFFFFF - t1) + t2);
 }
 
+static inline u32 ticks_elapsed_in_us(u32 t1, u32 t2)
+{
+	if (t2 >= t1)
+		return PM_TIMER_TICKS_TO_US(t2 - t1);
+	else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
+		return PM_TIMER_TICKS_TO_US(((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
+	else
+		return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2);
+}
+
+#ifndef CONFIG_CPU_IDLE
+
 static void
 acpi_processor_power_activate(struct acpi_processor *pr,
 			      struct acpi_processor_cx *new)
@@ -248,6 +270,7 @@
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
 }
+#endif /* !CONFIG_CPU_IDLE */
 
 #ifdef ARCH_APICTIMER_STOPS_ON_C3
 
@@ -330,6 +353,7 @@
 	return 0;
 }
 
+#ifndef CONFIG_CPU_IDLE
 static void acpi_processor_idle(void)
 {
 	struct acpi_processor *pr = NULL;
@@ -427,7 +451,7 @@
 	 * an SMP system. We do it here instead of doing it at _CST/P_LVL
 	 * detection phase, to work cleanly with logical CPU hotplug.
 	 */
-	if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 
+	if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
 	    !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
 		cx = &pr->power.states[ACPI_STATE_C1];
 #endif
@@ -727,6 +751,7 @@
 
 	return 0;
 }
+#endif /* !CONFIG_CPU_IDLE */
 
 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
 {
@@ -744,7 +769,7 @@
 #ifndef CONFIG_HOTPLUG_CPU
 	/*
 	 * Check for P_LVL2_UP flag before entering C2 and above on
-	 * an SMP system. 
+	 * an SMP system.
 	 */
 	if ((num_online_cpus() > 1) &&
 	    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
@@ -945,7 +970,12 @@
 	 * Normalize the C2 latency to expidite policy
 	 */
 	cx->valid = 1;
+
+#ifndef CONFIG_CPU_IDLE
 	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+#else
+	cx->latency_ticks = cx->latency;
+#endif
 
 	return;
 }
@@ -1025,7 +1055,12 @@
 	 * use this in our C3 policy
 	 */
 	cx->valid = 1;
+
+#ifndef CONFIG_CPU_IDLE
 	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+#else
+	cx->latency_ticks = cx->latency;
+#endif
 
 	return;
 }
@@ -1090,6 +1125,7 @@
 
 	pr->power.count = acpi_processor_power_verify(pr);
 
+#ifndef CONFIG_CPU_IDLE
 	/*
 	 * Set Default Policy
 	 * ------------------
@@ -1101,6 +1137,7 @@
 	result = acpi_processor_set_power_policy(pr);
 	if (result)
 		return result;
+#endif
 
 	/*
 	 * if one state of type C2 or C3 is available, mark this
@@ -1117,35 +1154,6 @@
 	return 0;
 }
 
-int acpi_processor_cst_has_changed(struct acpi_processor *pr)
-{
-	int result = 0;
-
-
-	if (!pr)
-		return -EINVAL;
-
-	if (nocst) {
-		return -ENODEV;
-	}
-
-	if (!pr->flags.power_setup_done)
-		return -ENODEV;
-
-	/* Fall back to the default idle loop */
-	pm_idle = pm_idle_save;
-	synchronize_sched();	/* Relies on interrupts forcing exit from idle. */
-
-	pr->flags.power = 0;
-	result = acpi_processor_get_power_info(pr);
-	if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
-		pm_idle = acpi_processor_idle;
-
-	return result;
-}
-
-/* proc interface */
-
 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
 {
 	struct acpi_processor *pr = seq->private;
@@ -1227,6 +1235,35 @@
 	.release = single_release,
 };
 
+#ifndef CONFIG_CPU_IDLE
+
+int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+{
+	int result = 0;
+
+
+	if (!pr)
+		return -EINVAL;
+
+	if (nocst) {
+		return -ENODEV;
+	}
+
+	if (!pr->flags.power_setup_done)
+		return -ENODEV;
+
+	/* Fall back to the default idle loop */
+	pm_idle = pm_idle_save;
+	synchronize_sched();	/* Relies on interrupts forcing exit from idle. */
+
+	pr->flags.power = 0;
+	result = acpi_processor_get_power_info(pr);
+	if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
+		pm_idle = acpi_processor_idle;
+
+	return result;
+}
+
 #ifdef CONFIG_SMP
 static void smp_callback(void *v)
 {
@@ -1249,8 +1286,367 @@
 static struct notifier_block acpi_processor_latency_notifier = {
 	.notifier_call = acpi_processor_latency_notify,
 };
+
 #endif
 
+#else /* CONFIG_CPU_IDLE */
+
+/**
+ * acpi_idle_bm_check - checks if bus master activity was detected
+ */
+static int acpi_idle_bm_check(void)
+{
+	u32 bm_status = 0;
+
+	acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
+	if (bm_status)
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
+	/*
+	 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
+	 * the true state of bus mastering activity; forcing us to
+	 * manually check the BMIDEA bit of each IDE channel.
+	 */
+	else if (errata.piix4.bmisx) {
+		if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
+		    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
+			bm_status = 1;
+	}
+	return bm_status;
+}
+
+/**
+ * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state
+ * @pr: the processor
+ * @target: the new target state
+ */
+static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
+					   struct acpi_processor_cx *target)
+{
+	if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) {
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+		pr->flags.bm_rld_set = 0;
+	}
+
+	if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) {
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
+		pr->flags.bm_rld_set = 1;
+	}
+}
+
+/**
+ * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
+ * @cx: cstate data
+ */
+static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
+{
+	if (cx->space_id == ACPI_CSTATE_FFH) {
+		/* Call into architectural FFH based C-state */
+		acpi_processor_ffh_cstate_enter(cx);
+	} else {
+		int unused;
+		/* IO port based C-state */
+		inb(cx->address);
+		/* Dummy wait op - must do something useless after P_LVL2 read
+		   because chipsets cannot guarantee that STPCLK# signal
+		   gets asserted in time to freeze execution properly. */
+		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	}
+}
+
+/**
+ * acpi_idle_enter_c1 - enters an ACPI C1 state-type
+ * @dev: the target CPU
+ * @state: the state data
+ *
+ * This is equivalent to the HALT instruction.
+ */
+static int acpi_idle_enter_c1(struct cpuidle_device *dev,
+			      struct cpuidle_state *state)
+{
+	struct acpi_processor *pr;
+	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+	pr = processors[smp_processor_id()];
+
+	if (unlikely(!pr))
+		return 0;
+
+	if (pr->flags.bm_check)
+		acpi_idle_update_bm_rld(pr, cx);
+
+	current_thread_info()->status &= ~TS_POLLING;
+	/*
+	 * TS_POLLING-cleared state must be visible before we test
+	 * NEED_RESCHED:
+	 */
+	smp_mb();
+	if (!need_resched())
+		safe_halt();
+	current_thread_info()->status |= TS_POLLING;
+
+	cx->usage++;
+
+	return 0;
+}
+
+/**
+ * acpi_idle_enter_simple - enters an ACPI state without BM handling
+ * @dev: the target CPU
+ * @state: the state data
+ */
+static int acpi_idle_enter_simple(struct cpuidle_device *dev,
+				  struct cpuidle_state *state)
+{
+	struct acpi_processor *pr;
+	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+	u32 t1, t2;
+	pr = processors[smp_processor_id()];
+
+	if (unlikely(!pr))
+		return 0;
+
+	if (acpi_idle_suspend)
+		return(acpi_idle_enter_c1(dev, state));
+
+	if (pr->flags.bm_check)
+		acpi_idle_update_bm_rld(pr, cx);
+
+	local_irq_disable();
+	current_thread_info()->status &= ~TS_POLLING;
+	/*
+	 * TS_POLLING-cleared state must be visible before we test
+	 * NEED_RESCHED:
+	 */
+	smp_mb();
+
+	if (unlikely(need_resched())) {
+		current_thread_info()->status |= TS_POLLING;
+		local_irq_enable();
+		return 0;
+	}
+
+	if (cx->type == ACPI_STATE_C3)
+		ACPI_FLUSH_CPU_CACHE();
+
+	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	acpi_state_timer_broadcast(pr, cx, 1);
+	acpi_idle_do_entry(cx);
+	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+
+#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
+	/* TSC could halt in idle, so notify users */
+	mark_tsc_unstable("TSC halts in idle");;
+#endif
+
+	local_irq_enable();
+	current_thread_info()->status |= TS_POLLING;
+
+	cx->usage++;
+
+	acpi_state_timer_broadcast(pr, cx, 0);
+	cx->time += ticks_elapsed(t1, t2);
+	return ticks_elapsed_in_us(t1, t2);
+}
+
+static int c3_cpu_count;
+static DEFINE_SPINLOCK(c3_lock);
+
+/**
+ * acpi_idle_enter_bm - enters C3 with proper BM handling
+ * @dev: the target CPU
+ * @state: the state data
+ *
+ * If BM is detected, the deepest non-C3 idle state is entered instead.
+ */
+static int acpi_idle_enter_bm(struct cpuidle_device *dev,
+			      struct cpuidle_state *state)
+{
+	struct acpi_processor *pr;
+	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+	u32 t1, t2;
+	pr = processors[smp_processor_id()];
+
+	if (unlikely(!pr))
+		return 0;
+
+	if (acpi_idle_suspend)
+		return(acpi_idle_enter_c1(dev, state));
+
+	local_irq_disable();
+	current_thread_info()->status &= ~TS_POLLING;
+	/*
+	 * TS_POLLING-cleared state must be visible before we test
+	 * NEED_RESCHED:
+	 */
+	smp_mb();
+
+	if (unlikely(need_resched())) {
+		current_thread_info()->status |= TS_POLLING;
+		local_irq_enable();
+		return 0;
+	}
+
+	/*
+	 * Must be done before busmaster disable as we might need to
+	 * access HPET !
+	 */
+	acpi_state_timer_broadcast(pr, cx, 1);
+
+	if (acpi_idle_bm_check()) {
+		cx = pr->power.bm_state;
+
+		acpi_idle_update_bm_rld(pr, cx);
+
+		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+		acpi_idle_do_entry(cx);
+		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	} else {
+		acpi_idle_update_bm_rld(pr, cx);
+
+		spin_lock(&c3_lock);
+		c3_cpu_count++;
+		/* Disable bus master arbitration when all CPUs are in C3 */
+		if (c3_cpu_count == num_online_cpus())
+			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
+		spin_unlock(&c3_lock);
+
+		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+		acpi_idle_do_entry(cx);
+		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+
+		spin_lock(&c3_lock);
+		/* Re-enable bus master arbitration */
+		if (c3_cpu_count == num_online_cpus())
+			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+		c3_cpu_count--;
+		spin_unlock(&c3_lock);
+	}
+
+#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
+	/* TSC could halt in idle, so notify users */
+	mark_tsc_unstable("TSC halts in idle");
+#endif
+
+	local_irq_enable();
+	current_thread_info()->status |= TS_POLLING;
+
+	cx->usage++;
+
+	acpi_state_timer_broadcast(pr, cx, 0);
+	cx->time += ticks_elapsed(t1, t2);
+	return ticks_elapsed_in_us(t1, t2);
+}
+
+struct cpuidle_driver acpi_idle_driver = {
+	.name =		"acpi_idle",
+	.owner =	THIS_MODULE,
+};
+
+/**
+ * acpi_processor_setup_cpuidle - prepares and configures CPUIDLE
+ * @pr: the ACPI processor
+ */
+static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
+{
+	int i, count = 0;
+	struct acpi_processor_cx *cx;
+	struct cpuidle_state *state;
+	struct cpuidle_device *dev = &pr->power.dev;
+
+	if (!pr->flags.power_setup_done)
+		return -EINVAL;
+
+	if (pr->flags.power == 0) {
+		return -EINVAL;
+	}
+
+	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
+		cx = &pr->power.states[i];
+		state = &dev->states[count];
+
+		if (!cx->valid)
+			continue;
+
+#ifdef CONFIG_HOTPLUG_CPU
+		if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
+		    !pr->flags.has_cst &&
+		    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
+			continue;
+#endif
+		cpuidle_set_statedata(state, cx);
+
+		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
+		state->exit_latency = cx->latency;
+		state->target_residency = cx->latency * 6;
+		state->power_usage = cx->power;
+
+		state->flags = 0;
+		switch (cx->type) {
+			case ACPI_STATE_C1:
+			state->flags |= CPUIDLE_FLAG_SHALLOW;
+			state->enter = acpi_idle_enter_c1;
+			break;
+
+			case ACPI_STATE_C2:
+			state->flags |= CPUIDLE_FLAG_BALANCED;
+			state->flags |= CPUIDLE_FLAG_TIME_VALID;
+			state->enter = acpi_idle_enter_simple;
+			break;
+
+			case ACPI_STATE_C3:
+			state->flags |= CPUIDLE_FLAG_DEEP;
+			state->flags |= CPUIDLE_FLAG_TIME_VALID;
+			state->flags |= CPUIDLE_FLAG_CHECK_BM;
+			state->enter = pr->flags.bm_check ?
+					acpi_idle_enter_bm :
+					acpi_idle_enter_simple;
+			break;
+		}
+
+		count++;
+	}
+
+	dev->state_count = count;
+
+	if (!count)
+		return -EINVAL;
+
+	/* find the deepest state that can handle active BM */
+	if (pr->flags.bm_check) {
+		for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++)
+			if (pr->power.states[i].type == ACPI_STATE_C3)
+				break;
+		pr->power.bm_state = &pr->power.states[i-1];
+	}
+
+	return 0;
+}
+
+int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+{
+	int ret;
+
+	if (!pr)
+		return -EINVAL;
+
+	if (nocst) {
+		return -ENODEV;
+	}
+
+	if (!pr->flags.power_setup_done)
+		return -ENODEV;
+
+	cpuidle_pause_and_lock();
+	cpuidle_disable_device(&pr->power.dev);
+	acpi_processor_get_power_info(pr);
+	acpi_processor_setup_cpuidle(pr);
+	ret = cpuidle_enable_device(&pr->power.dev);
+	cpuidle_resume_and_unlock();
+
+	return ret;
+}
+
+#endif /* CONFIG_CPU_IDLE */
+
 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 			      struct acpi_device *device)
 {
@@ -1267,7 +1663,7 @@
 			       "ACPI: processor limited to max C-state %d\n",
 			       max_cstate);
 		first_run++;
-#ifdef CONFIG_SMP
+#if !defined (CONFIG_CPU_IDLE) && defined (CONFIG_SMP)
 		register_latency_notifier(&acpi_processor_latency_notifier);
 #endif
 	}
@@ -1285,6 +1681,7 @@
 	}
 
 	acpi_processor_get_power_info(pr);
+	pr->flags.power_setup_done = 1;
 
 	/*
 	 * Install the idle handler if processor power management is supported.
@@ -1292,6 +1689,13 @@
 	 * platforms that only support C1.
 	 */
 	if ((pr->flags.power) && (!boot_option_idle_override)) {
+#ifdef CONFIG_CPU_IDLE
+		acpi_processor_setup_cpuidle(pr);
+		pr->power.dev.cpu = pr->id;
+		if (cpuidle_register_device(&pr->power.dev))
+			return -EIO;
+#endif
+
 		printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id);
 		for (i = 1; i <= pr->power.count; i++)
 			if (pr->power.states[i].valid)
@@ -1299,10 +1703,12 @@
 				       pr->power.states[i].type);
 		printk(")\n");
 
+#ifndef CONFIG_CPU_IDLE
 		if (pr->id == 0) {
 			pm_idle_save = pm_idle;
 			pm_idle = acpi_processor_idle;
 		}
+#endif
 	}
 
 	/* 'power' [R] */
@@ -1316,21 +1722,24 @@
 		entry->owner = THIS_MODULE;
 	}
 
-	pr->flags.power_setup_done = 1;
-
 	return 0;
 }
 
 int acpi_processor_power_exit(struct acpi_processor *pr,
 			      struct acpi_device *device)
 {
-
+#ifdef CONFIG_CPU_IDLE
+	if ((pr->flags.power) && (!boot_option_idle_override))
+		cpuidle_unregister_device(&pr->power.dev);
+#endif
 	pr->flags.power_setup_done = 0;
 
 	if (acpi_device_dir(device))
 		remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
 				  acpi_device_dir(device));
 
+#ifndef CONFIG_CPU_IDLE
+
 	/* Unregister the idle handler when processor #0 is removed. */
 	if (pr->id == 0) {
 		pm_idle = pm_idle_save;
@@ -1345,6 +1754,7 @@
 		unregister_latency_notifier(&acpi_processor_latency_notifier);
 #endif
 	}
+#endif
 
 	return 0;
 }
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index a578986..90fd09c 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -1,6 +1,8 @@
 /*
- *  acpi_sbs.c - ACPI Smart Battery System Driver ($Revision: 1.16 $)
+ *  sbs.c - ACPI Smart Battery System Driver ($Revision: 2.0 $)
  *
+ *  Copyright (c) 2007 Alexey Starikovskiy <astarikovskiy@suse.de>
+ *  Copyright (c) 2005-2007 Vladimir Lebedev <vladimir.p.lebedev@intel.com>
  *  Copyright (c) 2005 Rich Townsend <rhdt@bartol.udel.edu>
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -26,15 +28,22 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
+
+#ifdef CONFIG_ACPI_PROCFS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <asm/uaccess.h>
+#endif
+
 #include <linux/acpi.h>
 #include <linux/timer.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
 
-#define ACPI_SBS_COMPONENT		0x00080000
+#include <linux/power_supply.h>
+
+#include "sbshc.h"
+
 #define ACPI_SBS_CLASS			"sbs"
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_BATTERY_CLASS		"battery"
@@ -44,836 +53,436 @@
 #define ACPI_SBS_FILE_ALARM		"alarm"
 #define ACPI_BATTERY_DIR_NAME		"BAT%i"
 #define ACPI_AC_DIR_NAME		"AC0"
-#define ACPI_SBC_SMBUS_ADDR		0x9
-#define ACPI_SBSM_SMBUS_ADDR		0xa
-#define ACPI_SB_SMBUS_ADDR		0xb
-#define ACPI_SBS_AC_NOTIFY_STATUS	0x80
-#define ACPI_SBS_BATTERY_NOTIFY_STATUS	0x80
-#define ACPI_SBS_BATTERY_NOTIFY_INFO	0x81
 
-#define _COMPONENT			ACPI_SBS_COMPONENT
+enum acpi_sbs_device_addr {
+	ACPI_SBS_CHARGER = 0x9,
+	ACPI_SBS_MANAGER = 0xa,
+	ACPI_SBS_BATTERY = 0xb,
+};
 
-ACPI_MODULE_NAME("sbs");
+#define ACPI_SBS_NOTIFY_STATUS		0x80
+#define ACPI_SBS_NOTIFY_INFO		0x81
 
-MODULE_AUTHOR("Rich Townsend");
+MODULE_AUTHOR("Alexey Starikovskiy <astarikovskiy@suse.de>");
 MODULE_DESCRIPTION("Smart Battery System ACPI interface driver");
 MODULE_LICENSE("GPL");
 
-#define	xmsleep(t)	msleep(t)
-
-#define ACPI_EC_SMB_PRTCL	0x00	/* protocol, PEC */
-
-#define ACPI_EC_SMB_STS		0x01	/* status */
-#define ACPI_EC_SMB_ADDR	0x02	/* address */
-#define ACPI_EC_SMB_CMD		0x03	/* command */
-#define ACPI_EC_SMB_DATA	0x04	/* 32 data registers */
-#define ACPI_EC_SMB_BCNT	0x24	/* number of data bytes */
-
-#define ACPI_EC_SMB_STS_DONE	0x80
-#define ACPI_EC_SMB_STS_STATUS	0x1f
-
-#define ACPI_EC_SMB_PRTCL_WRITE		0x00
-#define ACPI_EC_SMB_PRTCL_READ		0x01
-#define ACPI_EC_SMB_PRTCL_WORD_DATA	0x08
-#define ACPI_EC_SMB_PRTCL_BLOCK_DATA	0x0a
-
-#define ACPI_EC_SMB_TRANSACTION_SLEEP	1
-#define ACPI_EC_SMB_ACCESS_SLEEP1	1
-#define ACPI_EC_SMB_ACCESS_SLEEP2	10
-
-#define	DEF_CAPACITY_UNIT	3
-#define	MAH_CAPACITY_UNIT	1
-#define	MWH_CAPACITY_UNIT	2
-#define	CAPACITY_UNIT		DEF_CAPACITY_UNIT
-
-#define	REQUEST_UPDATE_MODE	1
-#define	QUEUE_UPDATE_MODE	2
-
-#define	DATA_TYPE_COMMON	0
-#define	DATA_TYPE_INFO		1
-#define	DATA_TYPE_STATE		2
-#define	DATA_TYPE_ALARM		3
-#define	DATA_TYPE_AC_STATE	4
+static unsigned int cache_time = 1000;
+module_param(cache_time, uint, 0644);
+MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
 
 extern struct proc_dir_entry *acpi_lock_ac_dir(void);
 extern struct proc_dir_entry *acpi_lock_battery_dir(void);
 extern void acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir);
 extern void acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
 
-#define	MAX_SBS_BAT			4
+#define MAX_SBS_BAT			4
 #define ACPI_SBS_BLOCK_MAX		32
 
-#define ACPI_SBS_SMBUS_READ		1
-#define ACPI_SBS_SMBUS_WRITE		2
-
-#define ACPI_SBS_WORD_DATA		1
-#define ACPI_SBS_BLOCK_DATA		2
-
-#define	UPDATE_DELAY	10
-
-/* 0 - every time, > 0 - by update_time */
-static unsigned int update_time = 120;
-
-static unsigned int capacity_mode = CAPACITY_UNIT;
-
-module_param(update_time, uint, 0644);
-module_param(capacity_mode, uint, 0444);
-
-static int acpi_sbs_add(struct acpi_device *device);
-static int acpi_sbs_remove(struct acpi_device *device, int type);
-static int acpi_sbs_resume(struct acpi_device *device);
-
 static const struct acpi_device_id sbs_device_ids[] = {
-	{"ACPI0001", 0},
-	{"ACPI0005", 0},
+	{"ACPI0002", 0},
 	{"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, sbs_device_ids);
 
-static struct acpi_driver acpi_sbs_driver = {
-	.name = "sbs",
-	.class = ACPI_SBS_CLASS,
-	.ids = sbs_device_ids,
-	.ops = {
-		.add = acpi_sbs_add,
-		.remove = acpi_sbs_remove,
-		.resume = acpi_sbs_resume,
-		},
-};
-
-struct acpi_ac {
-	int ac_present;
-};
-
-struct acpi_battery_info {
-	int capacity_mode;
-	s16 full_charge_capacity;
-	s16 design_capacity;
-	s16 design_voltage;
-	int vscale;
-	int ipscale;
-	s16 serial_number;
-	char manufacturer_name[ACPI_SBS_BLOCK_MAX + 3];
-	char device_name[ACPI_SBS_BLOCK_MAX + 3];
-	char device_chemistry[ACPI_SBS_BLOCK_MAX + 3];
-};
-
-struct acpi_battery_state {
-	s16 voltage;
-	s16 amperage;
-	s16 remaining_capacity;
-	s16 battery_state;
-};
-
-struct acpi_battery_alarm {
-	s16 remaining_capacity;
-};
-
 struct acpi_battery {
-	int alive;
-	int id;
-	int init_state;
-	int battery_present;
+	struct power_supply bat;
 	struct acpi_sbs *sbs;
-	struct acpi_battery_info info;
-	struct acpi_battery_state state;
-	struct acpi_battery_alarm alarm;
-	struct proc_dir_entry *battery_entry;
+#ifdef CONFIG_ACPI_PROCFS
+	struct proc_dir_entry *proc_entry;
+#endif
+	unsigned long update_time;
+	char name[8];
+	char manufacturer_name[ACPI_SBS_BLOCK_MAX];
+	char device_name[ACPI_SBS_BLOCK_MAX];
+	char device_chemistry[ACPI_SBS_BLOCK_MAX];
+	u16 alarm_capacity;
+	u16 full_charge_capacity;
+	u16 design_capacity;
+	u16 design_voltage;
+	u16 serial_number;
+	u16 cycle_count;
+	u16 temp_now;
+	u16 voltage_now;
+	s16 current_now;
+	s16 current_avg;
+	u16 capacity_now;
+	u16 state_of_charge;
+	u16 state;
+	u16 mode;
+	u16 spec;
+	u8 id;
+	u8 present:1;
 };
 
+#define to_acpi_battery(x) container_of(x, struct acpi_battery, bat);
+
 struct acpi_sbs {
-	int base;
+	struct power_supply charger;
 	struct acpi_device *device;
-	struct mutex mutex;
-	int sbsm_present;
-	int sbsm_batteries_supported;
-	struct proc_dir_entry *ac_entry;
-	struct acpi_ac ac;
+	struct acpi_smb_hc *hc;
+	struct mutex lock;
+#ifdef CONFIG_ACPI_PROCFS
+	struct proc_dir_entry *charger_entry;
+#endif
 	struct acpi_battery battery[MAX_SBS_BAT];
-	int zombie;
-	struct timer_list update_timer;
-	int run_cnt;
-	int update_proc_flg;
+	u8 batteries_supported:4;
+	u8 manager_present:1;
+	u8 charger_present:1;
 };
 
-static int acpi_sbs_update_run(struct acpi_sbs *sbs, int id, int data_type);
-static void acpi_sbs_update_time(void *data);
+#define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger)
 
-union sbs_rw_data {
-	u16 word;
-	u8 block[ACPI_SBS_BLOCK_MAX + 2];
-};
-
-static int acpi_ec_sbs_access(struct acpi_sbs *sbs, u16 addr,
-			      char read_write, u8 command, int size,
-			      union sbs_rw_data *data);
-
-/* --------------------------------------------------------------------------
-                               SMBus Communication
-   -------------------------------------------------------------------------- */
-
-static int acpi_ec_sbs_read(struct acpi_sbs *sbs, u8 address, u8 * data)
+static inline int battery_scale(int log)
 {
-	u8 val;
-	int err;
-
-	err = ec_read(sbs->base + address, &val);
-	if (!err) {
-		*data = val;
-	}
-	xmsleep(ACPI_EC_SMB_TRANSACTION_SLEEP);
-	return (err);
+	int scale = 1;
+	while (log--)
+		scale *= 10;
+	return scale;
 }
 
-static int acpi_ec_sbs_write(struct acpi_sbs *sbs, u8 address, u8 data)
+static inline int acpi_battery_vscale(struct acpi_battery *battery)
 {
-	int err;
-
-	err = ec_write(sbs->base + address, data);
-	return (err);
+	return battery_scale((battery->spec & 0x0f00) >> 8);
 }
 
-static int
-acpi_ec_sbs_access(struct acpi_sbs *sbs, u16 addr,
-		   char read_write, u8 command, int size,
-		   union sbs_rw_data *data)
+static inline int acpi_battery_ipscale(struct acpi_battery *battery)
 {
-	unsigned char protocol, len = 0, temp[2] = { 0, 0 };
-	int i;
+	return battery_scale((battery->spec & 0xf000) >> 12);
+}
 
-	if (read_write == ACPI_SBS_SMBUS_READ) {
-		protocol = ACPI_EC_SMB_PRTCL_READ;
-	} else {
-		protocol = ACPI_EC_SMB_PRTCL_WRITE;
-	}
+static inline int acpi_battery_mode(struct acpi_battery *battery)
+{
+	return (battery->mode & 0x8000);
+}
 
-	switch (size) {
+static inline int acpi_battery_scale(struct acpi_battery *battery)
+{
+	return (acpi_battery_mode(battery) ? 10 : 1) *
+	    acpi_battery_ipscale(battery);
+}
 
-	case ACPI_SBS_WORD_DATA:
-		acpi_ec_sbs_write(sbs, ACPI_EC_SMB_CMD, command);
-		if (read_write == ACPI_SBS_SMBUS_WRITE) {
-			acpi_ec_sbs_write(sbs, ACPI_EC_SMB_DATA, data->word);
-			acpi_ec_sbs_write(sbs, ACPI_EC_SMB_DATA + 1,
-					  data->word >> 8);
-		}
-		protocol |= ACPI_EC_SMB_PRTCL_WORD_DATA;
-		break;
-	case ACPI_SBS_BLOCK_DATA:
-		acpi_ec_sbs_write(sbs, ACPI_EC_SMB_CMD, command);
-		if (read_write == ACPI_SBS_SMBUS_WRITE) {
-			len = min_t(u8, data->block[0], 32);
-			acpi_ec_sbs_write(sbs, ACPI_EC_SMB_BCNT, len);
-			for (i = 0; i < len; i++)
-				acpi_ec_sbs_write(sbs, ACPI_EC_SMB_DATA + i,
-						  data->block[i + 1]);
-		}
-		protocol |= ACPI_EC_SMB_PRTCL_BLOCK_DATA;
+static int sbs_get_ac_property(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
+{
+	struct acpi_sbs *sbs = to_acpi_sbs(psy);
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = sbs->charger_present;
 		break;
 	default:
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"unsupported transaction %d", size));
-		return (-1);
+		return -EINVAL;
 	}
-
-	acpi_ec_sbs_write(sbs, ACPI_EC_SMB_ADDR, addr << 1);
-	acpi_ec_sbs_write(sbs, ACPI_EC_SMB_PRTCL, protocol);
-
-	acpi_ec_sbs_read(sbs, ACPI_EC_SMB_STS, temp);
-
-	if (~temp[0] & ACPI_EC_SMB_STS_DONE) {
-		xmsleep(ACPI_EC_SMB_ACCESS_SLEEP1);
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_STS, temp);
-	}
-	if (~temp[0] & ACPI_EC_SMB_STS_DONE) {
-		xmsleep(ACPI_EC_SMB_ACCESS_SLEEP2);
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_STS, temp);
-	}
-	if ((~temp[0] & ACPI_EC_SMB_STS_DONE)
-	    || (temp[0] & ACPI_EC_SMB_STS_STATUS)) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"transaction %d error", size));
-		return (-1);
-	}
-
-	if (read_write == ACPI_SBS_SMBUS_WRITE) {
-		return (0);
-	}
-
-	switch (size) {
-
-	case ACPI_SBS_WORD_DATA:
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_DATA, temp);
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_DATA + 1, temp + 1);
-		data->word = (temp[1] << 8) | temp[0];
-		break;
-
-	case ACPI_SBS_BLOCK_DATA:
-		len = 0;
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_BCNT, &len);
-		len = min_t(u8, len, 32);
-		for (i = 0; i < len; i++)
-			acpi_ec_sbs_read(sbs, ACPI_EC_SMB_DATA + i,
-					 data->block + i + 1);
-		data->block[0] = len;
-		break;
-	default:
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"unsupported transaction %d", size));
-		return (-1);
-	}
-
-	return (0);
-}
-
-static int
-acpi_sbs_read_word(struct acpi_sbs *sbs, int addr, int func, u16 * word)
-{
-	union sbs_rw_data data;
-	int result = 0;
-
-	result = acpi_ec_sbs_access(sbs, addr,
-				    ACPI_SBS_SMBUS_READ, func,
-				    ACPI_SBS_WORD_DATA, &data);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ec_sbs_access() failed"));
-	} else {
-		*word = data.word;
-	}
-
-	return result;
-}
-
-static int
-acpi_sbs_read_str(struct acpi_sbs *sbs, int addr, int func, char *str)
-{
-	union sbs_rw_data data;
-	int result = 0;
-
-	result = acpi_ec_sbs_access(sbs, addr,
-				    ACPI_SBS_SMBUS_READ, func,
-				    ACPI_SBS_BLOCK_DATA, &data);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ec_sbs_access() failed"));
-	} else {
-		strncpy(str, (const char *)data.block + 1, data.block[0]);
-		str[data.block[0]] = 0;
-	}
-
-	return result;
-}
-
-static int
-acpi_sbs_write_word(struct acpi_sbs *sbs, int addr, int func, int word)
-{
-	union sbs_rw_data data;
-	int result = 0;
-
-	data.word = word;
-
-	result = acpi_ec_sbs_access(sbs, addr,
-				    ACPI_SBS_SMBUS_WRITE, func,
-				    ACPI_SBS_WORD_DATA, &data);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ec_sbs_access() failed"));
-	}
-
-	return result;
-}
-
-static int sbs_zombie(struct acpi_sbs *sbs)
-{
-	return (sbs->zombie);
-}
-
-static int sbs_mutex_lock(struct acpi_sbs *sbs)
-{
-	if (sbs_zombie(sbs)) {
-		return -ENODEV;
-	}
-	mutex_lock(&sbs->mutex);
 	return 0;
 }
 
-static void sbs_mutex_unlock(struct acpi_sbs *sbs)
+static int acpi_battery_technology(struct acpi_battery *battery)
 {
-	mutex_unlock(&sbs->mutex);
+	if (!strcasecmp("NiCd", battery->device_chemistry))
+		return POWER_SUPPLY_TECHNOLOGY_NiCd;
+	if (!strcasecmp("NiMH", battery->device_chemistry))
+		return POWER_SUPPLY_TECHNOLOGY_NiMH;
+	if (!strcasecmp("LION", battery->device_chemistry))
+		return POWER_SUPPLY_TECHNOLOGY_LION;
+	if (!strcasecmp("LiP", battery->device_chemistry))
+		return POWER_SUPPLY_TECHNOLOGY_LIPO;
+	return POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
 }
 
+static int acpi_sbs_battery_get_property(struct power_supply *psy,
+					 enum power_supply_property psp,
+					 union power_supply_propval *val)
+{
+	struct acpi_battery *battery = to_acpi_battery(psy);
+
+	if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT)
+		return -ENODEV;
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		if (battery->current_now < 0)
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		else if (battery->current_now > 0)
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = battery->present;
+		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = acpi_battery_technology(battery);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		val->intval = battery->design_voltage *
+			acpi_battery_vscale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = battery->voltage_now *
+				acpi_battery_vscale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		val->intval = abs(battery->current_now) *
+				acpi_battery_ipscale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_AVG:
+		val->intval = abs(battery->current_avg) *
+				acpi_battery_ipscale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		val->intval = battery->state_of_charge;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+		val->intval = battery->design_capacity *
+			acpi_battery_scale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_ENERGY_FULL:
+		val->intval = battery->full_charge_capacity *
+			acpi_battery_scale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+		val->intval = battery->capacity_now *
+				acpi_battery_scale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_TEMP:
+		val->intval = battery->temp_now - 2730;	// dK -> dC
+		break;
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = battery->device_name;
+		break;
+	case POWER_SUPPLY_PROP_MANUFACTURER:
+		val->strval = battery->manufacturer_name;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static enum power_supply_property sbs_ac_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+};
+
+static enum power_supply_property sbs_charge_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CURRENT_AVG,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static enum power_supply_property sbs_energy_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CURRENT_AVG,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
+	POWER_SUPPLY_PROP_ENERGY_FULL,
+	POWER_SUPPLY_PROP_ENERGY_NOW,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
 /* --------------------------------------------------------------------------
                             Smart Battery System Management
    -------------------------------------------------------------------------- */
 
-static int acpi_check_update_proc(struct acpi_sbs *sbs)
-{
-	acpi_status status = AE_OK;
+struct acpi_battery_reader {
+	u8 command;		/* command for battery */
+	u8 mode;		/* word or block? */
+	size_t offset;		/* offset inside struct acpi_sbs_battery */
+};
 
-	if (update_time == 0) {
-		sbs->update_proc_flg = 0;
-		return 0;
-	}
-	if (sbs->update_proc_flg == 0) {
-		status = acpi_os_execute(OSL_GPE_HANDLER,
-					 acpi_sbs_update_time, sbs);
-		if (status != AE_OK) {
-			ACPI_EXCEPTION((AE_INFO, status,
-					"acpi_os_execute() failed"));
-			return 1;
-		}
-		sbs->update_proc_flg = 1;
-	}
-	return 0;
-}
+static struct acpi_battery_reader info_readers[] = {
+	{0x01, SMBUS_READ_WORD, offsetof(struct acpi_battery, alarm_capacity)},
+	{0x03, SMBUS_READ_WORD, offsetof(struct acpi_battery, mode)},
+	{0x10, SMBUS_READ_WORD, offsetof(struct acpi_battery, full_charge_capacity)},
+	{0x17, SMBUS_READ_WORD, offsetof(struct acpi_battery, cycle_count)},
+	{0x18, SMBUS_READ_WORD, offsetof(struct acpi_battery, design_capacity)},
+	{0x19, SMBUS_READ_WORD, offsetof(struct acpi_battery, design_voltage)},
+	{0x1a, SMBUS_READ_WORD, offsetof(struct acpi_battery, spec)},
+	{0x1c, SMBUS_READ_WORD, offsetof(struct acpi_battery, serial_number)},
+	{0x20, SMBUS_READ_BLOCK, offsetof(struct acpi_battery, manufacturer_name)},
+	{0x21, SMBUS_READ_BLOCK, offsetof(struct acpi_battery, device_name)},
+	{0x22, SMBUS_READ_BLOCK, offsetof(struct acpi_battery, device_chemistry)},
+};
 
-static int acpi_sbs_generate_event(struct acpi_device *device,
-				   int event, int state, char *bid, char *class)
-{
-	char bid_saved[5];
-	char class_saved[20];
-	int result = 0;
+static struct acpi_battery_reader state_readers[] = {
+	{0x08, SMBUS_READ_WORD, offsetof(struct acpi_battery, temp_now)},
+	{0x09, SMBUS_READ_WORD, offsetof(struct acpi_battery, voltage_now)},
+	{0x0a, SMBUS_READ_WORD, offsetof(struct acpi_battery, current_now)},
+	{0x0b, SMBUS_READ_WORD, offsetof(struct acpi_battery, current_avg)},
+	{0x0f, SMBUS_READ_WORD, offsetof(struct acpi_battery, capacity_now)},
+	{0x0e, SMBUS_READ_WORD, offsetof(struct acpi_battery, state_of_charge)},
+	{0x16, SMBUS_READ_WORD, offsetof(struct acpi_battery, state)},
+};
 
-	strcpy(bid_saved, acpi_device_bid(device));
-	strcpy(class_saved, acpi_device_class(device));
-
-	strcpy(acpi_device_bid(device), bid);
-	strcpy(acpi_device_class(device), class);
-
-	result = acpi_bus_generate_proc_event(device, event, state);
-
-	strcpy(acpi_device_bid(device), bid_saved);
-	strcpy(acpi_device_class(device), class_saved);
-
-	acpi_bus_generate_netlink_event(class, bid, event, state);
-	return result;
-}
-
-static int acpi_battery_get_present(struct acpi_battery *battery)
-{
-	s16 state;
-	int result = 0;
-	int is_present = 0;
-
-	result = acpi_sbs_read_word(battery->sbs,
-				    ACPI_SBSM_SMBUS_ADDR, 0x01, &state);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-	}
-	if (!result) {
-		is_present = (state & 0x000f) & (1 << battery->id);
-	}
-	battery->battery_present = is_present;
-
-	return result;
-}
-
-static int acpi_battery_select(struct acpi_battery *battery)
-{
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-	s16 state;
-	int foo;
-
-	if (sbs->sbsm_present) {
-
-		/* Take special care not to knobble other nibbles of
-		 * state (aka selector_state), since
-		 * it causes charging to halt on SBSELs */
-
-		result =
-		    acpi_sbs_read_word(sbs, ACPI_SBSM_SMBUS_ADDR, 0x01, &state);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_read_word() failed"));
-			goto end;
-		}
-
-		foo = (state & 0x0fff) | (1 << (battery->id + 12));
-		result =
-		    acpi_sbs_write_word(sbs, ACPI_SBSM_SMBUS_ADDR, 0x01, foo);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_write_word() failed"));
-			goto end;
-		}
-	}
-
-      end:
-	return result;
-}
-
-static int acpi_sbsm_get_info(struct acpi_sbs *sbs)
+static int acpi_manager_get_info(struct acpi_sbs *sbs)
 {
 	int result = 0;
-	s16 battery_system_info;
+	u16 battery_system_info;
 
-	result = acpi_sbs_read_word(sbs, ACPI_SBSM_SMBUS_ADDR, 0x04,
-				    &battery_system_info);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-	sbs->sbsm_present = 1;
-	sbs->sbsm_batteries_supported = battery_system_info & 0x000f;
-
-      end:
-
+	result = acpi_smbus_read(sbs->hc, SMBUS_READ_WORD, ACPI_SBS_MANAGER,
+				 0x04, (u8 *)&battery_system_info);
+	if (!result)
+		sbs->batteries_supported = battery_system_info & 0x000f;
 	return result;
 }
 
 static int acpi_battery_get_info(struct acpi_battery *battery)
 {
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-	s16 battery_mode;
-	s16 specification_info;
+	int i, result = 0;
 
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x03,
-				    &battery_mode);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
+	for (i = 0; i < ARRAY_SIZE(info_readers); ++i) {
+		result = acpi_smbus_read(battery->sbs->hc,
+					 info_readers[i].mode,
+					 ACPI_SBS_BATTERY,
+					 info_readers[i].command,
+					 (u8 *) battery +
+						info_readers[i].offset);
+		if (result)
+			break;
 	}
-	battery->info.capacity_mode = (battery_mode & 0x8000) >> 15;
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x10,
-				    &battery->info.full_charge_capacity);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x18,
-				    &battery->info.design_capacity);
-
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x19,
-				    &battery->info.design_voltage);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x1a,
-				    &specification_info);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	switch ((specification_info & 0x0f00) >> 8) {
-	case 1:
-		battery->info.vscale = 10;
-		break;
-	case 2:
-		battery->info.vscale = 100;
-		break;
-	case 3:
-		battery->info.vscale = 1000;
-		break;
-	default:
-		battery->info.vscale = 1;
-	}
-
-	switch ((specification_info & 0xf000) >> 12) {
-	case 1:
-		battery->info.ipscale = 10;
-		break;
-	case 2:
-		battery->info.ipscale = 100;
-		break;
-	case 3:
-		battery->info.ipscale = 1000;
-		break;
-	default:
-		battery->info.ipscale = 1;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x1c,
-				    &battery->info.serial_number);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_str(sbs, ACPI_SB_SMBUS_ADDR, 0x20,
-				   battery->info.manufacturer_name);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_str() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_str(sbs, ACPI_SB_SMBUS_ADDR, 0x21,
-				   battery->info.device_name);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_str() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_str(sbs, ACPI_SB_SMBUS_ADDR, 0x22,
-				   battery->info.device_chemistry);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_str() failed"));
-		goto end;
-	}
-
-      end:
 	return result;
 }
 
 static int acpi_battery_get_state(struct acpi_battery *battery)
 {
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
+	int i, result = 0;
 
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x09,
-				    &battery->state.voltage);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
+	if (battery->update_time &&
+	    time_before(jiffies, battery->update_time +
+				msecs_to_jiffies(cache_time)))
+		return 0;
+	for (i = 0; i < ARRAY_SIZE(state_readers); ++i) {
+		result = acpi_smbus_read(battery->sbs->hc,
+					 state_readers[i].mode,
+					 ACPI_SBS_BATTERY,
+					 state_readers[i].command,
+				         (u8 *)battery +
+						state_readers[i].offset);
+		if (result)
+			goto end;
 	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x0a,
-				    &battery->state.amperage);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x0f,
-				    &battery->state.remaining_capacity);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x16,
-				    &battery->state.battery_state);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
       end:
+	battery->update_time = jiffies;
 	return result;
 }
 
 static int acpi_battery_get_alarm(struct acpi_battery *battery)
 {
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x01,
-				    &battery->alarm.remaining_capacity);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-      end:
-
-	return result;
+	return acpi_smbus_read(battery->sbs->hc, SMBUS_READ_WORD,
+				 ACPI_SBS_BATTERY, 0x01,
+				 (u8 *)&battery->alarm_capacity);
 }
 
-static int acpi_battery_set_alarm(struct acpi_battery *battery,
-				  unsigned long alarm)
+static int acpi_battery_set_alarm(struct acpi_battery *battery)
 {
 	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-	s16 battery_mode;
-	int foo;
+	u16 value, sel = 1 << (battery->id + 12);
 
-	result = acpi_battery_select(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_select() failed"));
-		goto end;
-	}
+	int ret;
 
-	/* If necessary, enable the alarm */
 
-	if (alarm > 0) {
-		result =
-		    acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x03,
-				       &battery_mode);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_read_word() failed"));
+	if (sbs->manager_present) {
+		ret = acpi_smbus_read(sbs->hc, SMBUS_READ_WORD, ACPI_SBS_MANAGER,
+				0x01, (u8 *)&value);
+		if (ret)
 			goto end;
-		}
-
-		result =
-		    acpi_sbs_write_word(sbs, ACPI_SB_SMBUS_ADDR, 0x01,
-					battery_mode & 0xbfff);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_write_word() failed"));
+		if ((value & 0xf000) != sel) {
+			value &= 0x0fff;
+			value |= sel;
+		ret = acpi_smbus_write(sbs->hc, SMBUS_WRITE_WORD,
+					 ACPI_SBS_MANAGER,
+					 0x01, (u8 *)&value, 2);
+		if (ret)
 			goto end;
 		}
 	}
-
-	foo = alarm / (battery->info.capacity_mode ? 10 : 1);
-	result = acpi_sbs_write_word(sbs, ACPI_SB_SMBUS_ADDR, 0x01, foo);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_write_word() failed"));
-		goto end;
-	}
-
+	ret = acpi_smbus_write(sbs->hc, SMBUS_WRITE_WORD, ACPI_SBS_BATTERY,
+				0x01, (u8 *)&battery->alarm_capacity, 2);
       end:
-
-	return result;
-}
-
-static int acpi_battery_set_mode(struct acpi_battery *battery)
-{
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-	s16 battery_mode;
-
-	if (capacity_mode == DEF_CAPACITY_UNIT) {
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs,
-				    ACPI_SB_SMBUS_ADDR, 0x03, &battery_mode);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	if (capacity_mode == MAH_CAPACITY_UNIT) {
-		battery_mode &= 0x7fff;
-	} else {
-		battery_mode |= 0x8000;
-	}
-	result = acpi_sbs_write_word(sbs,
-				     ACPI_SB_SMBUS_ADDR, 0x03, battery_mode);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_write_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs,
-				    ACPI_SB_SMBUS_ADDR, 0x03, &battery_mode);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-      end:
-	return result;
-}
-
-static int acpi_battery_init(struct acpi_battery *battery)
-{
-	int result = 0;
-
-	result = acpi_battery_select(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_select() failed"));
-		goto end;
-	}
-
-	result = acpi_battery_set_mode(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_set_mode() failed"));
-		goto end;
-	}
-
-	result = acpi_battery_get_info(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_info() failed"));
-		goto end;
-	}
-
-	result = acpi_battery_get_state(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_state() failed"));
-		goto end;
-	}
-
-	result = acpi_battery_get_alarm(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_alarm() failed"));
-		goto end;
-	}
-
-      end:
-	return result;
+	return ret;
 }
 
 static int acpi_ac_get_present(struct acpi_sbs *sbs)
 {
-	int result = 0;
-	s16 charger_status;
+	int result;
+	u16 status;
 
-	result = acpi_sbs_read_word(sbs, ACPI_SBC_SMBUS_ADDR, 0x13,
-				    &charger_status);
-
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	sbs->ac.ac_present = (charger_status & 0x8000) >> 15;
-
-      end:
-
+	result = acpi_smbus_read(sbs->hc, SMBUS_READ_WORD, ACPI_SBS_CHARGER,
+				 0x13, (u8 *) & status);
+	if (!result)
+		sbs->charger_present = (status >> 15) & 0x1;
 	return result;
 }
 
+static ssize_t acpi_battery_alarm_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev));
+	acpi_battery_get_alarm(battery);
+	return sprintf(buf, "%d\n", battery->alarm_capacity *
+				acpi_battery_scale(battery) * 1000);
+}
+
+static ssize_t acpi_battery_alarm_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	unsigned long x;
+	struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev));
+	if (sscanf(buf, "%ld\n", &x) == 1)
+		battery->alarm_capacity = x /
+			(1000 * acpi_battery_scale(battery));
+	if (battery->present)
+		acpi_battery_set_alarm(battery);
+	return count;
+}
+
+static struct device_attribute alarm_attr = {
+	.attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+	.show = acpi_battery_alarm_show,
+	.store = acpi_battery_alarm_store,
+};
+
 /* --------------------------------------------------------------------------
                               FS Interface (/proc/acpi)
    -------------------------------------------------------------------------- */
 
+#ifdef CONFIG_ACPI_PROCFS
 /* Generic Routines */
-
 static int
-acpi_sbs_generic_add_fs(struct proc_dir_entry **dir,
-			struct proc_dir_entry *parent_dir,
-			char *dir_name,
-			struct file_operations *info_fops,
-			struct file_operations *state_fops,
-			struct file_operations *alarm_fops, void *data)
+acpi_sbs_add_fs(struct proc_dir_entry **dir,
+		struct proc_dir_entry *parent_dir,
+		char *dir_name,
+		struct file_operations *info_fops,
+		struct file_operations *state_fops,
+		struct file_operations *alarm_fops, void *data)
 {
 	struct proc_dir_entry *entry = NULL;
 
 	if (!*dir) {
 		*dir = proc_mkdir(dir_name, parent_dir);
 		if (!*dir) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"proc_mkdir() failed"));
 			return -ENODEV;
 		}
 		(*dir)->owner = THIS_MODULE;
@@ -882,10 +491,7 @@
 	/* 'info' [R] */
 	if (info_fops) {
 		entry = create_proc_entry(ACPI_SBS_FILE_INFO, S_IRUGO, *dir);
-		if (!entry) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"create_proc_entry() failed"));
-		} else {
+		if (entry) {
 			entry->proc_fops = info_fops;
 			entry->data = data;
 			entry->owner = THIS_MODULE;
@@ -895,10 +501,7 @@
 	/* 'state' [R] */
 	if (state_fops) {
 		entry = create_proc_entry(ACPI_SBS_FILE_STATE, S_IRUGO, *dir);
-		if (!entry) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"create_proc_entry() failed"));
-		} else {
+		if (entry) {
 			entry->proc_fops = state_fops;
 			entry->data = data;
 			entry->owner = THIS_MODULE;
@@ -908,24 +511,19 @@
 	/* 'alarm' [R/W] */
 	if (alarm_fops) {
 		entry = create_proc_entry(ACPI_SBS_FILE_ALARM, S_IRUGO, *dir);
-		if (!entry) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"create_proc_entry() failed"));
-		} else {
+		if (entry) {
 			entry->proc_fops = alarm_fops;
 			entry->data = data;
 			entry->owner = THIS_MODULE;
 		}
 	}
-
 	return 0;
 }
 
 static void
-acpi_sbs_generic_remove_fs(struct proc_dir_entry **dir,
+acpi_sbs_remove_fs(struct proc_dir_entry **dir,
 			   struct proc_dir_entry *parent_dir)
 {
-
 	if (*dir) {
 		remove_proc_entry(ACPI_SBS_FILE_INFO, *dir);
 		remove_proc_entry(ACPI_SBS_FILE_STATE, *dir);
@@ -933,82 +531,52 @@
 		remove_proc_entry((*dir)->name, parent_dir);
 		*dir = NULL;
 	}
-
 }
 
 /* Smart Battery Interface */
-
 static struct proc_dir_entry *acpi_battery_dir = NULL;
 
+static inline char *acpi_battery_units(struct acpi_battery *battery)
+{
+	return acpi_battery_mode(battery) ? " mWh" : " mAh";
+}
+
+
 static int acpi_battery_read_info(struct seq_file *seq, void *offset)
 {
 	struct acpi_battery *battery = seq->private;
 	struct acpi_sbs *sbs = battery->sbs;
-	int cscale;
 	int result = 0;
 
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
+	mutex_lock(&sbs->lock);
 
-	result = acpi_check_update_proc(sbs);
-	if (result)
+	seq_printf(seq, "present:                 %s\n",
+		   (battery->present) ? "yes" : "no");
+	if (!battery->present)
 		goto end;
 
-	if (update_time == 0) {
-		result = acpi_sbs_update_run(sbs, battery->id, DATA_TYPE_INFO);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_update_run() failed"));
-		}
-	}
-
-	if (battery->battery_present) {
-		seq_printf(seq, "present:                 yes\n");
-	} else {
-		seq_printf(seq, "present:                 no\n");
-		goto end;
-	}
-
-	if (battery->info.capacity_mode) {
-		cscale = battery->info.vscale * battery->info.ipscale;
-	} else {
-		cscale = battery->info.ipscale;
-	}
 	seq_printf(seq, "design capacity:         %i%s\n",
-		   battery->info.design_capacity * cscale,
-		   battery->info.capacity_mode ? "0 mWh" : " mAh");
-
+		   battery->design_capacity * acpi_battery_scale(battery),
+		   acpi_battery_units(battery));
 	seq_printf(seq, "last full capacity:      %i%s\n",
-		   battery->info.full_charge_capacity * cscale,
-		   battery->info.capacity_mode ? "0 mWh" : " mAh");
-
+		   battery->full_charge_capacity * acpi_battery_scale(battery),
+		   acpi_battery_units(battery));
 	seq_printf(seq, "battery technology:      rechargeable\n");
-
 	seq_printf(seq, "design voltage:          %i mV\n",
-		   battery->info.design_voltage * battery->info.vscale);
-
+		   battery->design_voltage * acpi_battery_vscale(battery));
 	seq_printf(seq, "design capacity warning: unknown\n");
 	seq_printf(seq, "design capacity low:     unknown\n");
 	seq_printf(seq, "capacity granularity 1:  unknown\n");
 	seq_printf(seq, "capacity granularity 2:  unknown\n");
-
-	seq_printf(seq, "model number:            %s\n",
-		   battery->info.device_name);
-
+	seq_printf(seq, "model number:            %s\n", battery->device_name);
 	seq_printf(seq, "serial number:           %i\n",
-		   battery->info.serial_number);
-
+		   battery->serial_number);
 	seq_printf(seq, "battery type:            %s\n",
-		   battery->info.device_chemistry);
-
+		   battery->device_chemistry);
 	seq_printf(seq, "OEM info:                %s\n",
-		   battery->info.manufacturer_name);
-
+		   battery->manufacturer_name);
       end:
-
-	sbs_mutex_unlock(sbs);
-
+	mutex_unlock(&sbs->lock);
 	return result;
 }
 
@@ -1022,73 +590,29 @@
 	struct acpi_battery *battery = seq->private;
 	struct acpi_sbs *sbs = battery->sbs;
 	int result = 0;
-	int cscale;
-	int foo;
 
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
-
-	result = acpi_check_update_proc(sbs);
-	if (result)
+	mutex_lock(&sbs->lock);
+	seq_printf(seq, "present:                 %s\n",
+		   (battery->present) ? "yes" : "no");
+	if (!battery->present)
 		goto end;
 
-	if (update_time == 0) {
-		result = acpi_sbs_update_run(sbs, battery->id, DATA_TYPE_STATE);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_update_run() failed"));
-		}
-	}
-
-	if (battery->battery_present) {
-		seq_printf(seq, "present:                 yes\n");
-	} else {
-		seq_printf(seq, "present:                 no\n");
-		goto end;
-	}
-
-	if (battery->info.capacity_mode) {
-		cscale = battery->info.vscale * battery->info.ipscale;
-	} else {
-		cscale = battery->info.ipscale;
-	}
-
-	if (battery->state.battery_state & 0x0010) {
-		seq_printf(seq, "capacity state:          critical\n");
-	} else {
-		seq_printf(seq, "capacity state:          ok\n");
-	}
-
-	foo = (s16) battery->state.amperage * battery->info.ipscale;
-	if (battery->info.capacity_mode) {
-		foo = foo * battery->info.design_voltage / 1000;
-	}
-	if (battery->state.amperage < 0) {
-		seq_printf(seq, "charging state:          discharging\n");
-		seq_printf(seq, "present rate:            %d %s\n",
-			   -foo, battery->info.capacity_mode ? "mW" : "mA");
-	} else if (battery->state.amperage > 0) {
-		seq_printf(seq, "charging state:          charging\n");
-		seq_printf(seq, "present rate:            %d %s\n",
-			   foo, battery->info.capacity_mode ? "mW" : "mA");
-	} else {
-		seq_printf(seq, "charging state:          charged\n");
-		seq_printf(seq, "present rate:            0 %s\n",
-			   battery->info.capacity_mode ? "mW" : "mA");
-	}
-
+	acpi_battery_get_state(battery);
+	seq_printf(seq, "capacity state:          %s\n",
+		   (battery->state & 0x0010) ? "critical" : "ok");
+	seq_printf(seq, "charging state:          %s\n",
+		   (battery->current_now < 0) ? "discharging" :
+		   ((battery->current_now > 0) ? "charging" : "charged"));
+	seq_printf(seq, "present rate:            %d mA\n",
+		   abs(battery->current_now) * acpi_battery_ipscale(battery));
 	seq_printf(seq, "remaining capacity:      %i%s\n",
-		   battery->state.remaining_capacity * cscale,
-		   battery->info.capacity_mode ? "0 mWh" : " mAh");
-
+		   battery->capacity_now * acpi_battery_scale(battery),
+		   acpi_battery_units(battery));
 	seq_printf(seq, "present voltage:         %i mV\n",
-		   battery->state.voltage * battery->info.vscale);
+		   battery->voltage_now * acpi_battery_vscale(battery));
 
       end:
-
-	sbs_mutex_unlock(sbs);
-
+	mutex_unlock(&sbs->lock);
 	return result;
 }
 
@@ -1102,48 +626,25 @@
 	struct acpi_battery *battery = seq->private;
 	struct acpi_sbs *sbs = battery->sbs;
 	int result = 0;
-	int cscale;
 
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
+	mutex_lock(&sbs->lock);
 
-	result = acpi_check_update_proc(sbs);
-	if (result)
-		goto end;
-
-	if (update_time == 0) {
-		result = acpi_sbs_update_run(sbs, battery->id, DATA_TYPE_ALARM);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_update_run() failed"));
-		}
-	}
-
-	if (!battery->battery_present) {
+	if (!battery->present) {
 		seq_printf(seq, "present:                 no\n");
 		goto end;
 	}
 
-	if (battery->info.capacity_mode) {
-		cscale = battery->info.vscale * battery->info.ipscale;
-	} else {
-		cscale = battery->info.ipscale;
-	}
-
+	acpi_battery_get_alarm(battery);
 	seq_printf(seq, "alarm:                   ");
-	if (battery->alarm.remaining_capacity) {
+	if (battery->alarm_capacity)
 		seq_printf(seq, "%i%s\n",
-			   battery->alarm.remaining_capacity * cscale,
-			   battery->info.capacity_mode ? "0 mWh" : " mAh");
-	} else {
+			   battery->alarm_capacity *
+			   acpi_battery_scale(battery),
+			   acpi_battery_units(battery));
+	else
 		seq_printf(seq, "disabled\n");
-	}
-
       end:
-
-	sbs_mutex_unlock(sbs);
-
+	mutex_unlock(&sbs->lock);
 	return result;
 }
 
@@ -1155,59 +656,29 @@
 	struct acpi_battery *battery = seq->private;
 	struct acpi_sbs *sbs = battery->sbs;
 	char alarm_string[12] = { '\0' };
-	int result, old_alarm, new_alarm;
-
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
-
-	result = acpi_check_update_proc(sbs);
-	if (result)
-		goto end;
-
-	if (!battery->battery_present) {
+	int result = 0;
+	mutex_lock(&sbs->lock);
+	if (!battery->present) {
 		result = -ENODEV;
 		goto end;
 	}
-
 	if (count > sizeof(alarm_string) - 1) {
 		result = -EINVAL;
 		goto end;
 	}
-
 	if (copy_from_user(alarm_string, buffer, count)) {
 		result = -EFAULT;
 		goto end;
 	}
-
 	alarm_string[count] = 0;
-
-	old_alarm = battery->alarm.remaining_capacity;
-	new_alarm = simple_strtoul(alarm_string, NULL, 0);
-
-	result = acpi_battery_set_alarm(battery, new_alarm);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_set_alarm() failed"));
-		acpi_battery_set_alarm(battery, old_alarm);
-		goto end;
-	}
-	result = acpi_battery_get_alarm(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_alarm() failed"));
-		acpi_battery_set_alarm(battery, old_alarm);
-		goto end;
-	}
-
+	battery->alarm_capacity = simple_strtoul(alarm_string, NULL, 0) /
+					acpi_battery_scale(battery);
+	acpi_battery_set_alarm(battery);
       end:
-	sbs_mutex_unlock(sbs);
-
-	if (result) {
+	mutex_unlock(&sbs->lock);
+	if (result)
 		return result;
-	} else {
-		return count;
-	}
+	return count;
 }
 
 static int acpi_battery_alarm_open_fs(struct inode *inode, struct file *file)
@@ -1246,26 +717,15 @@
 
 static int acpi_ac_read_state(struct seq_file *seq, void *offset)
 {
+
 	struct acpi_sbs *sbs = seq->private;
-	int result;
 
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
-
-	if (update_time == 0) {
-		result = acpi_sbs_update_run(sbs, -1, DATA_TYPE_AC_STATE);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_update_run() failed"));
-		}
-	}
+	mutex_lock(&sbs->lock);
 
 	seq_printf(seq, "state:                   %s\n",
-		   sbs->ac.ac_present ? "on-line" : "off-line");
+		   sbs->charger_present ? "on-line" : "off-line");
 
-	sbs_mutex_unlock(sbs);
-
+	mutex_unlock(&sbs->lock);
 	return 0;
 }
 
@@ -1282,429 +742,203 @@
 	.owner = THIS_MODULE,
 };
 
+#endif
+
 /* --------------------------------------------------------------------------
                                  Driver Interface
    -------------------------------------------------------------------------- */
+static int acpi_battery_read(struct acpi_battery *battery)
+{
+	int result = 0, saved_present = battery->present;
+	u16 state;
+
+	if (battery->sbs->manager_present) {
+		result = acpi_smbus_read(battery->sbs->hc, SMBUS_READ_WORD,
+				ACPI_SBS_MANAGER, 0x01, (u8 *)&state);
+		if (!result)
+			battery->present = state & (1 << battery->id);
+		state &= 0x0fff;
+		state |= 1 << (battery->id + 12);
+		acpi_smbus_write(battery->sbs->hc, SMBUS_WRITE_WORD,
+				  ACPI_SBS_MANAGER, 0x01, (u8 *)&state, 2);
+	} else if (battery->id == 0)
+		battery->present = 1;
+	if (result || !battery->present)
+		return result;
+
+	if (saved_present != battery->present) {
+		battery->update_time = 0;
+		result = acpi_battery_get_info(battery);
+		if (result)
+			return result;
+	}
+	result = acpi_battery_get_state(battery);
+	return result;
+}
 
 /* Smart Battery */
-
 static int acpi_battery_add(struct acpi_sbs *sbs, int id)
 {
-	int is_present;
+	struct acpi_battery *battery = &sbs->battery[id];
 	int result;
-	char dir_name[32];
-	struct acpi_battery *battery;
 
-	battery = &sbs->battery[id];
-
-	battery->alive = 0;
-
-	battery->init_state = 0;
 	battery->id = id;
 	battery->sbs = sbs;
+	result = acpi_battery_read(battery);
+	if (result)
+		return result;
 
-	result = acpi_battery_select(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_select() failed"));
-		goto end;
+	sprintf(battery->name, ACPI_BATTERY_DIR_NAME, id);
+#ifdef CONFIG_ACPI_PROCFS
+	acpi_sbs_add_fs(&battery->proc_entry, acpi_battery_dir,
+			battery->name, &acpi_battery_info_fops,
+			&acpi_battery_state_fops, &acpi_battery_alarm_fops,
+			battery);
+#endif
+	battery->bat.name = battery->name;
+	battery->bat.type = POWER_SUPPLY_TYPE_BATTERY;
+	if (!acpi_battery_mode(battery)) {
+		battery->bat.properties = sbs_charge_battery_props;
+		battery->bat.num_properties =
+		    ARRAY_SIZE(sbs_charge_battery_props);
+	} else {
+		battery->bat.properties = sbs_energy_battery_props;
+		battery->bat.num_properties =
+		    ARRAY_SIZE(sbs_energy_battery_props);
 	}
-
-	result = acpi_battery_get_present(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_present() failed"));
-		goto end;
-	}
-
-	is_present = battery->battery_present;
-
-	if (is_present) {
-		result = acpi_battery_init(battery);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_battery_init() failed"));
-			goto end;
-		}
-		battery->init_state = 1;
-	}
-
-	sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
-
-	result = acpi_sbs_generic_add_fs(&battery->battery_entry,
-					 acpi_battery_dir,
-					 dir_name,
-					 &acpi_battery_info_fops,
-					 &acpi_battery_state_fops,
-					 &acpi_battery_alarm_fops, battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_generic_add_fs() failed"));
-		goto end;
-	}
-	battery->alive = 1;
-
+	battery->bat.get_property = acpi_sbs_battery_get_property;
+	result = power_supply_register(&sbs->device->dev, &battery->bat);
+	device_create_file(battery->bat.dev, &alarm_attr);
 	printk(KERN_INFO PREFIX "%s [%s]: Battery Slot [%s] (battery %s)\n",
-	       ACPI_SBS_DEVICE_NAME, acpi_device_bid(sbs->device), dir_name,
-	       sbs->battery->battery_present ? "present" : "absent");
-
-      end:
+	       ACPI_SBS_DEVICE_NAME, acpi_device_bid(sbs->device),
+	       battery->name, sbs->battery->present ? "present" : "absent");
 	return result;
 }
 
 static void acpi_battery_remove(struct acpi_sbs *sbs, int id)
 {
-
-	if (sbs->battery[id].battery_entry) {
-		acpi_sbs_generic_remove_fs(&(sbs->battery[id].battery_entry),
-					   acpi_battery_dir);
+	if (sbs->battery[id].bat.dev)
+		device_remove_file(sbs->battery[id].bat.dev, &alarm_attr);
+		power_supply_unregister(&sbs->battery[id].bat);
+#ifdef CONFIG_ACPI_PROCFS
+	if (sbs->battery[id].proc_entry) {
+		acpi_sbs_remove_fs(&(sbs->battery[id].proc_entry),
+				   acpi_battery_dir);
 	}
+#endif
 }
 
-static int acpi_ac_add(struct acpi_sbs *sbs)
+static int acpi_charger_add(struct acpi_sbs *sbs)
 {
 	int result;
 
 	result = acpi_ac_get_present(sbs);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ac_get_present() failed"));
+	if (result)
 		goto end;
-	}
-
-	result = acpi_sbs_generic_add_fs(&sbs->ac_entry,
-					 acpi_ac_dir,
-					 ACPI_AC_DIR_NAME,
-					 NULL, &acpi_ac_state_fops, NULL, sbs);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_generic_add_fs() failed"));
+#ifdef CONFIG_ACPI_PROCFS
+	result = acpi_sbs_add_fs(&sbs->charger_entry, acpi_ac_dir,
+				 ACPI_AC_DIR_NAME, NULL,
+				 &acpi_ac_state_fops, NULL, sbs);
+	if (result)
 		goto end;
-	}
-
+#endif
+	sbs->charger.name = "sbs-charger";
+	sbs->charger.type = POWER_SUPPLY_TYPE_MAINS;
+	sbs->charger.properties = sbs_ac_props;
+	sbs->charger.num_properties = ARRAY_SIZE(sbs_ac_props);
+	sbs->charger.get_property = sbs_get_ac_property;
+	power_supply_register(&sbs->device->dev, &sbs->charger);
 	printk(KERN_INFO PREFIX "%s [%s]: AC Adapter [%s] (%s)\n",
 	       ACPI_SBS_DEVICE_NAME, acpi_device_bid(sbs->device),
-	       ACPI_AC_DIR_NAME, sbs->ac.ac_present ? "on-line" : "off-line");
-
+	       ACPI_AC_DIR_NAME, sbs->charger_present ? "on-line" : "off-line");
       end:
-
 	return result;
 }
 
-static void acpi_ac_remove(struct acpi_sbs *sbs)
+static void acpi_charger_remove(struct acpi_sbs *sbs)
 {
+	if (sbs->charger.dev)
+		power_supply_unregister(&sbs->charger);
+#ifdef CONFIG_ACPI_PROCFS
+	if (sbs->charger_entry)
+		acpi_sbs_remove_fs(&sbs->charger_entry, acpi_ac_dir);
+#endif
+}
 
-	if (sbs->ac_entry) {
-		acpi_sbs_generic_remove_fs(&sbs->ac_entry, acpi_ac_dir);
+void acpi_sbs_callback(void *context)
+{
+	int id;
+	struct acpi_sbs *sbs = context;
+	struct acpi_battery *bat;
+	u8 saved_charger_state = sbs->charger_present;
+	u8 saved_battery_state;
+	acpi_ac_get_present(sbs);
+	if (sbs->charger_present != saved_charger_state) {
+#ifdef CONFIG_ACPI_PROC_EVENT
+		acpi_bus_generate_proc_event4(ACPI_AC_CLASS, ACPI_AC_DIR_NAME,
+					      ACPI_SBS_NOTIFY_STATUS,
+					      sbs->charger_present);
+#endif
+		kobject_uevent(&sbs->charger.dev->kobj, KOBJ_CHANGE);
+	}
+	if (sbs->manager_present) {
+		for (id = 0; id < MAX_SBS_BAT; ++id) {
+			if (!(sbs->batteries_supported & (1 << id)))
+				continue;
+			bat = &sbs->battery[id];
+			saved_battery_state = bat->present;
+			acpi_battery_read(bat);
+			if (saved_battery_state == bat->present)
+				continue;
+#ifdef CONFIG_ACPI_PROC_EVENT
+			acpi_bus_generate_proc_event4(ACPI_BATTERY_CLASS,
+						      bat->name,
+						      ACPI_SBS_NOTIFY_STATUS,
+						      bat->present);
+#endif
+			kobject_uevent(&bat->bat.dev->kobj, KOBJ_CHANGE);
+		}
 	}
 }
 
-static void acpi_sbs_update_time_run(unsigned long data)
-{
-	acpi_os_execute(OSL_GPE_HANDLER, acpi_sbs_update_time, (void *)data);
-}
-
-static int acpi_sbs_update_run(struct acpi_sbs *sbs, int id, int data_type)
-{
-	struct acpi_battery *battery;
-	int result = 0, cnt;
-	int old_ac_present = -1;
-	int old_battery_present = -1;
-	int new_ac_present = -1;
-	int new_battery_present = -1;
-	int id_min = 0, id_max = MAX_SBS_BAT - 1;
-	char dir_name[32];
-	int do_battery_init = 0, do_ac_init = 0;
-	int old_remaining_capacity = 0;
-	int update_battery = 1;
-	int up_tm = update_time;
-
-	if (sbs_zombie(sbs)) {
-		goto end;
-	}
-
-	if (id >= 0) {
-		id_min = id_max = id;
-	}
-
-	if (data_type == DATA_TYPE_COMMON && up_tm > 0) {
-		cnt = up_tm / (up_tm > UPDATE_DELAY ? UPDATE_DELAY : up_tm);
-		if (sbs->run_cnt % cnt != 0) {
-			update_battery = 0;
-		}
-	}
-
-	sbs->run_cnt++;
-
-	old_ac_present = sbs->ac.ac_present;
-
-	result = acpi_ac_get_present(sbs);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ac_get_present() failed"));
-	}
-
-	new_ac_present = sbs->ac.ac_present;
-
-	do_ac_init = (old_ac_present != new_ac_present);
-	if (sbs->run_cnt == 1 && data_type == DATA_TYPE_COMMON) {
-		do_ac_init = 1;
-	}
-
-	if (do_ac_init) {
-		result = acpi_sbs_generate_event(sbs->device,
-						 ACPI_SBS_AC_NOTIFY_STATUS,
-						 new_ac_present,
-						 ACPI_AC_DIR_NAME,
-						 ACPI_AC_CLASS);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_generate_event() failed"));
-		}
-	}
-
-	if (data_type == DATA_TYPE_COMMON) {
-		if (!do_ac_init && !update_battery) {
-			goto end;
-		}
-	}
-
-	if (data_type == DATA_TYPE_AC_STATE && !do_ac_init) {
-		goto end;
-	}
-
-	for (id = id_min; id <= id_max; id++) {
-		battery = &sbs->battery[id];
-		if (battery->alive == 0) {
-			continue;
-		}
-
-		old_remaining_capacity = battery->state.remaining_capacity;
-
-		old_battery_present = battery->battery_present;
-
-		result = acpi_battery_select(battery);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_battery_select() failed"));
-		}
-
-		result = acpi_battery_get_present(battery);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_battery_get_present() failed"));
-		}
-
-		new_battery_present = battery->battery_present;
-
-		do_battery_init = ((old_battery_present != new_battery_present)
-				   && new_battery_present);
-		if (!new_battery_present)
-			goto event;
-		if (do_ac_init || do_battery_init) {
-			result = acpi_battery_init(battery);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_battery_init() "
-						"failed"));
-			}
-		}
-		if (sbs_zombie(sbs)) {
-			goto end;
-		}
-
-		if ((data_type == DATA_TYPE_COMMON
-		     || data_type == DATA_TYPE_INFO)
-		    && new_battery_present) {
-			result = acpi_battery_get_info(battery);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_battery_get_info() failed"));
-			}
-		}
-		if (data_type == DATA_TYPE_INFO) {
-			continue;
-		}
-		if (sbs_zombie(sbs)) {
-			goto end;
-		}
-
-		if ((data_type == DATA_TYPE_COMMON
-		     || data_type == DATA_TYPE_STATE)
-		    && new_battery_present) {
-			result = acpi_battery_get_state(battery);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_battery_get_state() failed"));
-			}
-		}
-		if (data_type == DATA_TYPE_STATE) {
-			goto event;
-		}
-		if (sbs_zombie(sbs)) {
-			goto end;
-		}
-
-		if ((data_type == DATA_TYPE_COMMON
-		     || data_type == DATA_TYPE_ALARM)
-		    && new_battery_present) {
-			result = acpi_battery_get_alarm(battery);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_battery_get_alarm() "
-						"failed"));
-			}
-		}
-		if (data_type == DATA_TYPE_ALARM) {
-			continue;
-		}
-		if (sbs_zombie(sbs)) {
-			goto end;
-		}
-
-	      event:
-
-		if (old_battery_present != new_battery_present || do_ac_init ||
-		    old_remaining_capacity !=
-		    battery->state.remaining_capacity) {
-			sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
-			result = acpi_sbs_generate_event(sbs->device,
-							 ACPI_SBS_BATTERY_NOTIFY_STATUS,
-							 new_battery_present,
-							 dir_name,
-							 ACPI_BATTERY_CLASS);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_sbs_generate_event() "
-						"failed"));
-			}
-		}
-	}
-
-      end:
-
-	return result;
-}
-
-static void acpi_sbs_update_time(void *data)
-{
-	struct acpi_sbs *sbs = data;
-	unsigned long delay = -1;
-	int result;
-	unsigned int up_tm = update_time;
-
-	if (sbs_mutex_lock(sbs))
-		return;
-
-	result = acpi_sbs_update_run(sbs, -1, DATA_TYPE_COMMON);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_update_run() failed"));
-	}
-
-	if (sbs_zombie(sbs)) {
-		goto end;
-	}
-
-	if (!up_tm) {
-		if (timer_pending(&sbs->update_timer))
-			del_timer(&sbs->update_timer);
-	} else {
-		delay = (up_tm > UPDATE_DELAY ? UPDATE_DELAY : up_tm);
-		delay = jiffies + HZ * delay;
-		if (timer_pending(&sbs->update_timer)) {
-			mod_timer(&sbs->update_timer, delay);
-		} else {
-			sbs->update_timer.data = (unsigned long)data;
-			sbs->update_timer.function = acpi_sbs_update_time_run;
-			sbs->update_timer.expires = delay;
-			add_timer(&sbs->update_timer);
-		}
-	}
-
-      end:
-
-	sbs_mutex_unlock(sbs);
-}
+static int acpi_sbs_remove(struct acpi_device *device, int type);
 
 static int acpi_sbs_add(struct acpi_device *device)
 {
-	struct acpi_sbs *sbs = NULL;
-	int result = 0, remove_result = 0;
+	struct acpi_sbs *sbs;
+	int result = 0;
 	int id;
-	acpi_status status = AE_OK;
-	unsigned long val;
-
-	status =
-	    acpi_evaluate_integer(device->handle, "_EC", NULL, &val);
-	if (ACPI_FAILURE(status)) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Error obtaining _EC"));
-		return -EIO;
-	}
 
 	sbs = kzalloc(sizeof(struct acpi_sbs), GFP_KERNEL);
 	if (!sbs) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "kzalloc() failed"));
 		result = -ENOMEM;
 		goto end;
 	}
 
-	mutex_init(&sbs->mutex);
+	mutex_init(&sbs->lock);
 
-	sbs_mutex_lock(sbs);
-
-	sbs->base = 0xff & (val >> 8);
+	sbs->hc = acpi_driver_data(device->parent);
 	sbs->device = device;
-
 	strcpy(acpi_device_name(device), ACPI_SBS_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_SBS_CLASS);
 	acpi_driver_data(device) = sbs;
 
-	result = acpi_ac_add(sbs);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "acpi_ac_add() failed"));
-		goto end;
-	}
-
-	acpi_sbsm_get_info(sbs);
-
-	if (!sbs->sbsm_present) {
-		result = acpi_battery_add(sbs, 0);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_battery_add() failed"));
-			goto end;
-		}
-	} else {
-		for (id = 0; id < MAX_SBS_BAT; id++) {
-			if ((sbs->sbsm_batteries_supported & (1 << id))) {
-				result = acpi_battery_add(sbs, id);
-				if (result) {
-					ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-							"acpi_battery_add() failed"));
-					goto end;
-				}
-			}
-		}
-	}
-
-	init_timer(&sbs->update_timer);
-	result = acpi_check_update_proc(sbs);
+	result = acpi_charger_add(sbs);
 	if (result)
 		goto end;
 
+	result = acpi_manager_get_info(sbs);
+	if (!result) {
+		sbs->manager_present = 1;
+		for (id = 0; id < MAX_SBS_BAT; ++id)
+			if ((sbs->batteries_supported & (1 << id)))
+				acpi_battery_add(sbs, id);
+	} else
+		acpi_battery_add(sbs, 0);
+	acpi_smbus_register_callback(sbs->hc, acpi_sbs_callback, sbs);
       end:
-
-	sbs_mutex_unlock(sbs);
-
-	if (result) {
-		remove_result = acpi_sbs_remove(device, 0);
-		if (remove_result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_remove() failed"));
-		}
-	}
-
+	if (result)
+		acpi_sbs_remove(device, 0);
 	return result;
 }
 
@@ -1713,39 +947,25 @@
 	struct acpi_sbs *sbs;
 	int id;
 
-	if (!device) {
+	if (!device)
 		return -EINVAL;
-	}
-
 	sbs = acpi_driver_data(device);
-	if (!sbs) {
+	if (!sbs)
 		return -EINVAL;
-	}
-
-	sbs_mutex_lock(sbs);
-
-	sbs->zombie = 1;
-	del_timer_sync(&sbs->update_timer);
-	acpi_os_wait_events_complete(NULL);
-	del_timer_sync(&sbs->update_timer);
-
-	for (id = 0; id < MAX_SBS_BAT; id++) {
+	mutex_lock(&sbs->lock);
+	acpi_smbus_unregister_callback(sbs->hc);
+	for (id = 0; id < MAX_SBS_BAT; ++id)
 		acpi_battery_remove(sbs, id);
-	}
-
-	acpi_ac_remove(sbs);
-
-	sbs_mutex_unlock(sbs);
-
-	mutex_destroy(&sbs->mutex);
-
+	acpi_charger_remove(sbs);
+	mutex_unlock(&sbs->lock);
+	mutex_destroy(&sbs->lock);
 	kfree(sbs);
-
 	return 0;
 }
 
 static void acpi_sbs_rmdirs(void)
 {
+#ifdef CONFIG_ACPI_PROCFS
 	if (acpi_ac_dir) {
 		acpi_unlock_ac_dir(acpi_ac_dir);
 		acpi_ac_dir = NULL;
@@ -1754,69 +974,58 @@
 		acpi_unlock_battery_dir(acpi_battery_dir);
 		acpi_battery_dir = NULL;
 	}
+#endif
 }
 
 static int acpi_sbs_resume(struct acpi_device *device)
 {
 	struct acpi_sbs *sbs;
-
 	if (!device)
 		return -EINVAL;
-
 	sbs = device->driver_data;
-
-	sbs->run_cnt = 0;
-
+	acpi_sbs_callback(sbs);
 	return 0;
 }
 
+static struct acpi_driver acpi_sbs_driver = {
+	.name = "sbs",
+	.class = ACPI_SBS_CLASS,
+	.ids = sbs_device_ids,
+	.ops = {
+		.add = acpi_sbs_add,
+		.remove = acpi_sbs_remove,
+		.resume = acpi_sbs_resume,
+		},
+};
+
 static int __init acpi_sbs_init(void)
 {
 	int result = 0;
 
 	if (acpi_disabled)
 		return -ENODEV;
-
-	if (capacity_mode != DEF_CAPACITY_UNIT
-	    && capacity_mode != MAH_CAPACITY_UNIT
-	    && capacity_mode != MWH_CAPACITY_UNIT) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"invalid capacity_mode = %d", capacity_mode));
-		return -EINVAL;
-	}
-
+#ifdef CONFIG_ACPI_PROCFS
 	acpi_ac_dir = acpi_lock_ac_dir();
-	if (!acpi_ac_dir) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_lock_ac_dir() failed"));
+	if (!acpi_ac_dir)
 		return -ENODEV;
-	}
-
 	acpi_battery_dir = acpi_lock_battery_dir();
 	if (!acpi_battery_dir) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_lock_battery_dir() failed"));
 		acpi_sbs_rmdirs();
 		return -ENODEV;
 	}
-
+#endif
 	result = acpi_bus_register_driver(&acpi_sbs_driver);
 	if (result < 0) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_bus_register_driver() failed"));
 		acpi_sbs_rmdirs();
 		return -ENODEV;
 	}
-
 	return 0;
 }
 
 static void __exit acpi_sbs_exit(void)
 {
 	acpi_bus_unregister_driver(&acpi_sbs_driver);
-
 	acpi_sbs_rmdirs();
-
 	return;
 }
 
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
new file mode 100644
index 0000000..046d7c3
--- /dev/null
+++ b/drivers/acpi/sbshc.c
@@ -0,0 +1,309 @@
+/*
+ * SMBus driver for ACPI Embedded Controller (v0.1)
+ *
+ * Copyright (c) 2007 Alexey Starikovskiy
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation version 2.
+ */
+
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/actypes.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include "sbshc.h"
+
+#define ACPI_SMB_HC_CLASS	"smbus_host_controller"
+#define ACPI_SMB_HC_DEVICE_NAME	"ACPI SMBus HC"
+
+struct acpi_smb_hc {
+	struct acpi_ec *ec;
+	struct mutex lock;
+	wait_queue_head_t wait;
+	u8 offset;
+	u8 query_bit;
+	smbus_alarm_callback callback;
+	void *context;
+};
+
+static int acpi_smbus_hc_add(struct acpi_device *device);
+static int acpi_smbus_hc_remove(struct acpi_device *device, int type);
+
+static const struct acpi_device_id sbs_device_ids[] = {
+	{"ACPI0001", 0},
+	{"ACPI0005", 0},
+	{"", 0},
+};
+
+MODULE_DEVICE_TABLE(acpi, sbs_device_ids);
+
+static struct acpi_driver acpi_smb_hc_driver = {
+	.name = "smbus_hc",
+	.class = ACPI_SMB_HC_CLASS,
+	.ids = sbs_device_ids,
+	.ops = {
+		.add = acpi_smbus_hc_add,
+		.remove = acpi_smbus_hc_remove,
+		},
+};
+
+union acpi_smb_status {
+	u8 raw;
+	struct {
+		u8 status:5;
+		u8 reserved:1;
+		u8 alarm:1;
+		u8 done:1;
+	} fields;
+};
+
+enum acpi_smb_status_codes {
+	SMBUS_OK = 0,
+	SMBUS_UNKNOWN_FAILURE = 0x07,
+	SMBUS_DEVICE_ADDRESS_NACK = 0x10,
+	SMBUS_DEVICE_ERROR = 0x11,
+	SMBUS_DEVICE_COMMAND_ACCESS_DENIED = 0x12,
+	SMBUS_UNKNOWN_ERROR = 0x13,
+	SMBUS_DEVICE_ACCESS_DENIED = 0x17,
+	SMBUS_TIMEOUT = 0x18,
+	SMBUS_HOST_UNSUPPORTED_PROTOCOL = 0x19,
+	SMBUS_BUSY = 0x1a,
+	SMBUS_PEC_ERROR = 0x1f,
+};
+
+enum acpi_smb_offset {
+	ACPI_SMB_PROTOCOL = 0,	/* protocol, PEC */
+	ACPI_SMB_STATUS = 1,	/* status */
+	ACPI_SMB_ADDRESS = 2,	/* address */
+	ACPI_SMB_COMMAND = 3,	/* command */
+	ACPI_SMB_DATA = 4,	/* 32 data registers */
+	ACPI_SMB_BLOCK_COUNT = 0x24,	/* number of data bytes */
+	ACPI_SMB_ALARM_ADDRESS = 0x25,	/* alarm address */
+	ACPI_SMB_ALARM_DATA = 0x26,	/* 2 bytes alarm data */
+};
+
+static inline int smb_hc_read(struct acpi_smb_hc *hc, u8 address, u8 *data)
+{
+	return ec_read(hc->offset + address, data);
+}
+
+static inline int smb_hc_write(struct acpi_smb_hc *hc, u8 address, u8 data)
+{
+	return ec_write(hc->offset + address, data);
+}
+
+static inline int smb_check_done(struct acpi_smb_hc *hc)
+{
+	union acpi_smb_status status = {.raw = 0};
+	smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw);
+	return status.fields.done && (status.fields.status == SMBUS_OK);
+}
+
+static int wait_transaction_complete(struct acpi_smb_hc *hc, int timeout)
+{
+	if (wait_event_timeout(hc->wait, smb_check_done(hc),
+			       msecs_to_jiffies(timeout)))
+		return 0;
+	else
+		return -ETIME;
+}
+
+int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol, u8 address,
+		    u8 command, u8 *data, u8 length)
+{
+	int ret = -EFAULT, i;
+	u8 temp, sz = 0;
+
+	mutex_lock(&hc->lock);
+	if (smb_hc_read(hc, ACPI_SMB_PROTOCOL, &temp))
+		goto end;
+	if (temp) {
+		ret = -EBUSY;
+		goto end;
+	}
+	smb_hc_write(hc, ACPI_SMB_COMMAND, command);
+	smb_hc_write(hc, ACPI_SMB_COMMAND, command);
+	if (!(protocol & 0x01)) {
+		smb_hc_write(hc, ACPI_SMB_BLOCK_COUNT, length);
+		for (i = 0; i < length; ++i)
+			smb_hc_write(hc, ACPI_SMB_DATA + i, data[i]);
+	}
+	smb_hc_write(hc, ACPI_SMB_ADDRESS, address << 1);
+	smb_hc_write(hc, ACPI_SMB_PROTOCOL, protocol);
+	/*
+	 * Wait for completion. Save the status code, data size,
+	 * and data into the return package (if required by the protocol).
+	 */
+	ret = wait_transaction_complete(hc, 1000);
+	if (ret || !(protocol & 0x01))
+		goto end;
+	switch (protocol) {
+	case SMBUS_RECEIVE_BYTE:
+	case SMBUS_READ_BYTE:
+		sz = 1;
+		break;
+	case SMBUS_READ_WORD:
+		sz = 2;
+		break;
+	case SMBUS_READ_BLOCK:
+		if (smb_hc_read(hc, ACPI_SMB_BLOCK_COUNT, &sz)) {
+			ret = -EFAULT;
+			goto end;
+		}
+		sz &= 0x1f;
+		break;
+	}
+	for (i = 0; i < sz; ++i)
+		smb_hc_read(hc, ACPI_SMB_DATA + i, &data[i]);
+      end:
+	mutex_unlock(&hc->lock);
+	return ret;
+}
+
+int acpi_smbus_read(struct acpi_smb_hc *hc, u8 protocol, u8 address,
+		    u8 command, u8 *data)
+{
+	return acpi_smbus_transaction(hc, protocol, address, command, data, 0);
+}
+
+EXPORT_SYMBOL_GPL(acpi_smbus_read);
+
+int acpi_smbus_write(struct acpi_smb_hc *hc, u8 protocol, u8 address,
+		     u8 command, u8 *data, u8 length)
+{
+	return acpi_smbus_transaction(hc, protocol, address, command, data, length);
+}
+
+EXPORT_SYMBOL_GPL(acpi_smbus_write);
+
+int acpi_smbus_register_callback(struct acpi_smb_hc *hc,
+			         smbus_alarm_callback callback, void *context)
+{
+	mutex_lock(&hc->lock);
+	hc->callback = callback;
+	hc->context = context;
+	mutex_unlock(&hc->lock);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(acpi_smbus_register_callback);
+
+int acpi_smbus_unregister_callback(struct acpi_smb_hc *hc)
+{
+	mutex_lock(&hc->lock);
+	hc->callback = NULL;
+	hc->context = NULL;
+	mutex_unlock(&hc->lock);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(acpi_smbus_unregister_callback);
+
+static void acpi_smbus_callback(void *context)
+{
+	struct acpi_smb_hc *hc = context;
+
+	if (hc->callback)
+		hc->callback(hc->context);
+}
+
+static int smbus_alarm(void *context)
+{
+	struct acpi_smb_hc *hc = context;
+	union acpi_smb_status status;
+	if (smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw))
+		return 0;
+	/* Check if it is only a completion notify */
+	if (status.fields.done)
+		wake_up(&hc->wait);
+	if (!status.fields.alarm)
+		return 0;
+	mutex_lock(&hc->lock);
+	smb_hc_write(hc, ACPI_SMB_STATUS, status.raw);
+	if (hc->callback)
+		acpi_os_execute(OSL_GPE_HANDLER, acpi_smbus_callback, hc);
+	mutex_unlock(&hc->lock);
+	return 0;
+}
+
+typedef int (*acpi_ec_query_func) (void *data);
+
+extern int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
+			      acpi_handle handle, acpi_ec_query_func func,
+			      void *data);
+
+static int acpi_smbus_hc_add(struct acpi_device *device)
+{
+	int status;
+	unsigned long val;
+	struct acpi_smb_hc *hc;
+
+	if (!device)
+		return -EINVAL;
+
+	status = acpi_evaluate_integer(device->handle, "_EC", NULL, &val);
+	if (ACPI_FAILURE(status)) {
+		printk(KERN_ERR PREFIX "error obtaining _EC.\n");
+		return -EIO;
+	}
+
+	strcpy(acpi_device_name(device), ACPI_SMB_HC_DEVICE_NAME);
+	strcpy(acpi_device_class(device), ACPI_SMB_HC_CLASS);
+
+	hc = kzalloc(sizeof(struct acpi_smb_hc), GFP_KERNEL);
+	if (!hc)
+		return -ENOMEM;
+	mutex_init(&hc->lock);
+	init_waitqueue_head(&hc->wait);
+
+	hc->ec = acpi_driver_data(device->parent);
+	hc->offset = (val >> 8) & 0xff;
+	hc->query_bit = val & 0xff;
+	acpi_driver_data(device) = hc;
+
+	acpi_ec_add_query_handler(hc->ec, hc->query_bit, NULL, smbus_alarm, hc);
+	printk(KERN_INFO PREFIX "SBS HC: EC = 0x%p, offset = 0x%0x, query_bit = 0x%0x\n",
+		hc->ec, hc->offset, hc->query_bit);
+
+	return 0;
+}
+
+extern void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit);
+
+static int acpi_smbus_hc_remove(struct acpi_device *device, int type)
+{
+	struct acpi_smb_hc *hc;
+
+	if (!device)
+		return -EINVAL;
+
+	hc = acpi_driver_data(device);
+	acpi_ec_remove_query_handler(hc->ec, hc->query_bit);
+	kfree(hc);
+	return 0;
+}
+
+static int __init acpi_smb_hc_init(void)
+{
+	int result;
+
+	result = acpi_bus_register_driver(&acpi_smb_hc_driver);
+	if (result < 0)
+		return -ENODEV;
+	return 0;
+}
+
+static void __exit acpi_smb_hc_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_smb_hc_driver);
+}
+
+module_init(acpi_smb_hc_init);
+module_exit(acpi_smb_hc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alexey Starikovskiy");
+MODULE_DESCRIPTION("ACPI SMBus HC driver");
diff --git a/drivers/acpi/sbshc.h b/drivers/acpi/sbshc.h
new file mode 100644
index 0000000..3bda349
--- /dev/null
+++ b/drivers/acpi/sbshc.h
@@ -0,0 +1,27 @@
+struct acpi_smb_hc;
+enum acpi_smb_protocol {
+	SMBUS_WRITE_QUICK = 2,
+	SMBUS_READ_QUICK = 3,
+	SMBUS_SEND_BYTE = 4,
+	SMBUS_RECEIVE_BYTE = 5,
+	SMBUS_WRITE_BYTE = 6,
+	SMBUS_READ_BYTE = 7,
+	SMBUS_WRITE_WORD  = 8,
+	SMBUS_READ_WORD  = 9,
+	SMBUS_WRITE_BLOCK = 0xa,
+	SMBUS_READ_BLOCK = 0xb,
+	SMBUS_PROCESS_CALL = 0xc,
+	SMBUS_BLOCK_PROCESS_CALL = 0xd,
+};
+
+static const u8 SMBUS_PEC = 0x80;
+
+typedef void (*smbus_alarm_callback)(void *context);
+
+extern int acpi_smbus_read(struct acpi_smb_hc *hc, u8 protocol, u8 address,
+	       u8 command, u8 * data);
+extern int acpi_smbus_write(struct acpi_smb_hc *hc, u8 protocol, u8 slave_address,
+		u8 command, u8 * data, u8 length);
+extern int acpi_smbus_register_callback(struct acpi_smb_hc *hc,
+			         smbus_alarm_callback callback, void *context);
+extern int acpi_smbus_unregister_callback(struct acpi_smb_hc *hc);
diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c
index 048295e..f3d3867 100644
--- a/drivers/acpi/sleep/main.c
+++ b/drivers/acpi/sleep/main.c
@@ -44,7 +44,6 @@
 	ACPI_FLUSH_CPU_CACHE();
 	acpi_enable_wakeup_device_prep(acpi_state);
 #endif
-	acpi_gpe_sleep_prepare(acpi_state);
 	acpi_enter_sleep_state_prep(acpi_state);
 	return 0;
 }
@@ -268,6 +267,11 @@
 
 static void acpi_hibernation_finish(void)
 {
+	/*
+	 * If ACPI is not enabled by the BIOS and the boot kernel, we need to
+	 * enable it here.
+	 */
+	acpi_enable();
 	acpi_leave_sleep_state(ACPI_STATE_S4);
 	acpi_disable_wakeup_device(ACPI_STATE_S4);
 
diff --git a/drivers/acpi/sleep/sleep.h b/drivers/acpi/sleep/sleep.h
index ff1f850..a2ea125 100644
--- a/drivers/acpi/sleep/sleep.h
+++ b/drivers/acpi/sleep/sleep.h
@@ -5,6 +5,5 @@
 extern void acpi_enable_wakeup_device_prep(u8 sleep_state);
 extern void acpi_enable_wakeup_device(u8 sleep_state);
 extern void acpi_disable_wakeup_device(u8 sleep_state);
-extern void acpi_gpe_sleep_prepare(u32 sleep_state);
 
 extern int acpi_sleep_prepare(u32 acpi_state);
diff --git a/drivers/acpi/sleep/wakeup.c b/drivers/acpi/sleep/wakeup.c
index 97c27dd..ed8e41b 100644
--- a/drivers/acpi/sleep/wakeup.c
+++ b/drivers/acpi/sleep/wakeup.c
@@ -64,36 +64,29 @@
 	ACPI_FUNCTION_TRACE("acpi_enable_wakeup_device");
 	spin_lock(&acpi_device_lock);
 	list_for_each_safe(node, next, &acpi_wakeup_device_list) {
-		struct acpi_device *dev = container_of(node,
-						       struct acpi_device,
-						       wakeup_list);
-
+		struct acpi_device *dev =
+			container_of(node, struct acpi_device, wakeup_list);
+		if (!dev->wakeup.flags.valid)
+			continue;
 		/* If users want to disable run-wake GPE,
 		 * we only disable it for wake and leave it for runtime
 		 */
-		if (dev->wakeup.flags.run_wake && !dev->wakeup.state.enabled) {
-			spin_unlock(&acpi_device_lock);
-			acpi_set_gpe_type(dev->wakeup.gpe_device,
-					  dev->wakeup.gpe_number,
-					  ACPI_GPE_TYPE_RUNTIME);
-			/* Re-enable it, since set_gpe_type will disable it */
-			acpi_enable_gpe(dev->wakeup.gpe_device,
-					dev->wakeup.gpe_number, ACPI_ISR);
-			spin_lock(&acpi_device_lock);
+		if (!dev->wakeup.state.enabled ||
+		    sleep_state > (u32) dev->wakeup.sleep_state) {
+			if (dev->wakeup.flags.run_wake) {
+				spin_unlock(&acpi_device_lock);
+				/* set_gpe_type will disable GPE, leave it like that */
+				acpi_set_gpe_type(dev->wakeup.gpe_device,
+						  dev->wakeup.gpe_number,
+						  ACPI_GPE_TYPE_RUNTIME);
+				spin_lock(&acpi_device_lock);
+			}
 			continue;
 		}
-
-		if (!dev->wakeup.flags.valid ||
-		    !dev->wakeup.state.enabled ||
-		    (sleep_state > (u32) dev->wakeup.sleep_state))
-			continue;
-
 		spin_unlock(&acpi_device_lock);
-		/* run-wake GPE has been enabled */
 		if (!dev->wakeup.flags.run_wake)
 			acpi_enable_gpe(dev->wakeup.gpe_device,
 					dev->wakeup.gpe_number, ACPI_ISR);
-		dev->wakeup.state.active = 1;
 		spin_lock(&acpi_device_lock);
 	}
 	spin_unlock(&acpi_device_lock);
@@ -112,27 +105,26 @@
 
 	spin_lock(&acpi_device_lock);
 	list_for_each_safe(node, next, &acpi_wakeup_device_list) {
-		struct acpi_device *dev = container_of(node,
-						       struct acpi_device,
-						       wakeup_list);
+		struct acpi_device *dev =
+			container_of(node, struct acpi_device, wakeup_list);
 
-		if (dev->wakeup.flags.run_wake && !dev->wakeup.state.enabled) {
-			spin_unlock(&acpi_device_lock);
-			acpi_set_gpe_type(dev->wakeup.gpe_device,
-					  dev->wakeup.gpe_number,
-					  ACPI_GPE_TYPE_WAKE_RUN);
-			/* Re-enable it, since set_gpe_type will disable it */
-			acpi_enable_gpe(dev->wakeup.gpe_device,
-					dev->wakeup.gpe_number, ACPI_NOT_ISR);
-			spin_lock(&acpi_device_lock);
+		if (!dev->wakeup.flags.valid)
+			continue;
+		if (!dev->wakeup.state.enabled ||
+		    sleep_state > (u32) dev->wakeup.sleep_state) {
+			if (dev->wakeup.flags.run_wake) {
+				spin_unlock(&acpi_device_lock);
+				acpi_set_gpe_type(dev->wakeup.gpe_device,
+						  dev->wakeup.gpe_number,
+						  ACPI_GPE_TYPE_WAKE_RUN);
+				/* Re-enable it, since set_gpe_type will disable it */
+				acpi_enable_gpe(dev->wakeup.gpe_device,
+						dev->wakeup.gpe_number, ACPI_NOT_ISR);
+				spin_lock(&acpi_device_lock);
+			}
 			continue;
 		}
 
-		if (!dev->wakeup.flags.valid ||
-		    !dev->wakeup.state.active ||
-		    (sleep_state > (u32) dev->wakeup.sleep_state))
-			continue;
-
 		spin_unlock(&acpi_device_lock);
 		acpi_disable_wakeup_device_power(dev);
 		/* Never disable run-wake GPE */
@@ -142,7 +134,6 @@
 			acpi_clear_gpe(dev->wakeup.gpe_device,
 				       dev->wakeup.gpe_number, ACPI_NOT_ISR);
 		}
-		dev->wakeup.state.active = 0;
 		spin_lock(&acpi_device_lock);
 	}
 	spin_unlock(&acpi_device_lock);
@@ -160,48 +151,20 @@
 		struct acpi_device *dev = container_of(node,
 						       struct acpi_device,
 						       wakeup_list);
-
 		/* In case user doesn't load button driver */
-		if (dev->wakeup.flags.run_wake && !dev->wakeup.state.enabled) {
-			spin_unlock(&acpi_device_lock);
-			acpi_set_gpe_type(dev->wakeup.gpe_device,
-					  dev->wakeup.gpe_number,
-					  ACPI_GPE_TYPE_WAKE_RUN);
-			acpi_enable_gpe(dev->wakeup.gpe_device,
-					dev->wakeup.gpe_number, ACPI_NOT_ISR);
-			dev->wakeup.state.enabled = 1;
-			spin_lock(&acpi_device_lock);
-		}
+		if (!dev->wakeup.flags.run_wake || dev->wakeup.state.enabled)
+			continue;
+		spin_unlock(&acpi_device_lock);
+		acpi_set_gpe_type(dev->wakeup.gpe_device,
+				  dev->wakeup.gpe_number,
+				  ACPI_GPE_TYPE_WAKE_RUN);
+		acpi_enable_gpe(dev->wakeup.gpe_device,
+				dev->wakeup.gpe_number, ACPI_NOT_ISR);
+		dev->wakeup.state.enabled = 1;
+		spin_lock(&acpi_device_lock);
 	}
 	spin_unlock(&acpi_device_lock);
-
 	return 0;
 }
 
 late_initcall(acpi_wakeup_device_init);
-
-/*
- * Disable all wakeup GPEs before entering requested sleep state.
- *	@sleep_state:	ACPI state
- * Since acpi_enter_sleep_state() will disable all
- * RUNTIME GPEs, we simply mark all GPES that
- * are not enabled for wakeup from requested state as RUNTIME.
- */
-void acpi_gpe_sleep_prepare(u32 sleep_state)
-{
-	struct list_head *node, *next;
-
-	list_for_each_safe(node, next, &acpi_wakeup_device_list) {
-		struct acpi_device *dev = container_of(node,
-						       struct acpi_device,
-						       wakeup_list);
-
-		/* The GPE can wakeup system from this state, don't touch it */
-		if ((u32) dev->wakeup.sleep_state >= sleep_state)
-			continue;
-		/* acpi_set_gpe_type will automatically disable GPE */
-		acpi_set_gpe_type(dev->wakeup.gpe_device,
-				  dev->wakeup.gpe_number,
-				  ACPI_GPE_TYPE_RUNTIME);
-	}
-}
diff --git a/drivers/acpi/tables/tbutils.c b/drivers/acpi/tables/tbutils.c
index 8cc9492..5f1d85f 100644
--- a/drivers/acpi/tables/tbutils.c
+++ b/drivers/acpi/tables/tbutils.c
@@ -400,7 +400,7 @@
 	u32 table_count;
 	struct acpi_table_header *table;
 	acpi_physical_address address;
-	acpi_physical_address rsdt_address;
+	acpi_physical_address uninitialized_var(rsdt_address);
 	u32 length;
 	u8 *table_entry;
 	acpi_status status;
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index ad898e1..5f79b44 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -195,6 +195,7 @@
 	struct acpi_thermal_trips trips;
 	struct acpi_handle_list devices;
 	struct timer_list timer;
+	struct mutex lock;
 };
 
 static const struct file_operations acpi_thermal_state_fops = {
@@ -711,6 +712,7 @@
 	int result = 0;
 	struct acpi_thermal *tz = data;
 	unsigned long sleep_time = 0;
+	unsigned long timeout_jiffies = 0;
 	int i = 0;
 	struct acpi_thermal_state state;
 
@@ -720,11 +722,15 @@
 		return;
 	}
 
+	/* Check if someone else is already running */
+	if (!mutex_trylock(&tz->lock))
+		return;
+
 	state = tz->state;
 
 	result = acpi_thermal_get_temperature(tz);
 	if (result)
-		return;
+		goto unlock;
 
 	memset(&tz->state, 0, sizeof(tz->state));
 
@@ -787,10 +793,13 @@
 	 * a thermal event occurs).  Note that _TSP and _TZD values are
 	 * given in 1/10th seconds (we must covert to milliseconds).
 	 */
-	if (tz->state.passive)
+	if (tz->state.passive) {
 		sleep_time = tz->trips.passive.tsp * 100;
-	else if (tz->polling_frequency > 0)
+		timeout_jiffies =  jiffies + (HZ * sleep_time) / 1000;
+	} else if (tz->polling_frequency > 0) {
 		sleep_time = tz->polling_frequency * 100;
+		timeout_jiffies =  round_jiffies(jiffies + (HZ * sleep_time) / 1000);
+	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s: temperature[%lu] sleep[%lu]\n",
 			  tz->name, tz->temperature, sleep_time));
@@ -804,17 +813,16 @@
 			del_timer(&(tz->timer));
 	} else {
 		if (timer_pending(&(tz->timer)))
-			mod_timer(&(tz->timer),
-					jiffies + (HZ * sleep_time) / 1000);
+			mod_timer(&(tz->timer), timeout_jiffies);
 		else {
 			tz->timer.data = (unsigned long)tz;
 			tz->timer.function = acpi_thermal_run;
-			tz->timer.expires = jiffies + (HZ * sleep_time) / 1000;
+			tz->timer.expires = timeout_jiffies;
 			add_timer(&(tz->timer));
 		}
 	}
-
-	return;
+      unlock:
+	mutex_unlock(&tz->lock);
 }
 
 /* --------------------------------------------------------------------------
@@ -1251,7 +1259,7 @@
 	strcpy(acpi_device_name(device), ACPI_THERMAL_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_THERMAL_CLASS);
 	acpi_driver_data(device) = tz;
-
+	mutex_init(&tz->lock);
 	result = acpi_thermal_get_info(tz);
 	if (result)
 		goto end;
@@ -1321,7 +1329,7 @@
 	}
 
 	acpi_thermal_remove_fs(device);
-
+	mutex_destroy(&tz->lock);
 	kfree(tz);
 	return 0;
 }
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index b8a2095..bac956b 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -409,14 +409,17 @@
 static int
 acpi_video_device_lcd_set_level(struct acpi_video_device *device, int level)
 {
-	int status;
+	int status = AE_OK;
 	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
 	struct acpi_object_list args = { 1, &arg0 };
 
 
 	arg0.integer.value = level;
-	status = acpi_evaluate_object(device->dev->handle, "_BCM", &args, NULL);
 
+	if (device->cap._BCM)
+		status = acpi_evaluate_object(device->dev->handle, "_BCM",
+					      &args, NULL);
+	device->brightness->curr = level;
 	return status;
 }
 
@@ -424,11 +427,11 @@
 acpi_video_device_lcd_get_level_current(struct acpi_video_device *device,
 					unsigned long *level)
 {
-	int status;
-
-	status = acpi_evaluate_integer(device->dev->handle, "_BQC", NULL, level);
-
-	return status;
+	if (device->cap._BQC)
+		return acpi_evaluate_integer(device->dev->handle, "_BQC", NULL,
+					     level);
+	*level = device->brightness->curr;
+	return AE_OK;
 }
 
 static int
@@ -1633,9 +1636,20 @@
 acpi_video_get_next_level(struct acpi_video_device *device,
 			  u32 level_current, u32 event)
 {
-	int min, max, min_above, max_below, i, l;
+	int min, max, min_above, max_below, i, l, delta = 255;
 	max = max_below = 0;
 	min = min_above = 255;
+	/* Find closest level to level_current */
+	for (i = 0; i < device->brightness->count; i++) {
+		l = device->brightness->levels[i];
+		if (abs(l - level_current) < abs(delta)) {
+			delta = l - level_current;
+			if (!delta)
+				break;
+		}
+	}
+	/* Ajust level_current to closest available level */
+	level_current += delta;
 	for (i = 0; i < device->brightness->count; i++) {
 		l = device->brightness->levels[i];
 		if (l < min)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index bbaa545..629eadb 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1392,7 +1392,7 @@
  *	@tf: Taskfile registers for the command and the result
  *	@cdb: CDB for packet command
  *	@dma_dir: Data tranfer direction of the command
- *	@sg: sg list for the data buffer of the command
+ *	@sgl: sg list for the data buffer of the command
  *	@n_elem: Number of sg entries
  *	@timeout: Timeout in msecs (0 for default)
  *
diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c
index 53070f6..d753e56 100644
--- a/drivers/ata/pata_cs5536.c
+++ b/drivers/ata/pata_cs5536.c
@@ -40,7 +40,7 @@
 #include <asm/msr.h>
 
 #define DRV_NAME	"pata_cs5536"
-#define DRV_VERSION	"0.0.5"
+#define DRV_VERSION	"0.0.6"
 
 enum {
 	CFG			= 0,
@@ -214,7 +214,7 @@
 		cs5536_read(pdev, DTC, &dtc);
 
 		dtc &= ~(IDE_DRV_MASK << dshift);
-		dtc |= mwdma_timings[mode] << dshift;
+		dtc |= mwdma_timings[mode - XFER_MW_DMA_0] << dshift;
 
 		cs5536_write(pdev, DTC, dtc);
 	}
diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index 8d98a9f..f147dc7 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c
@@ -92,7 +92,7 @@
 	.queuecommand		= ata_scsi_queuecmd,
 	.can_queue		= ATA_DEF_QUEUE,
 	.this_id		= ATA_SHT_THIS_ID,
-	.sg_tablesize		= ATA_MAX_PRD,
+	.sg_tablesize		= LIBATA_MAX_PRD,
 	.cmd_per_lun		= ATA_SHT_CMD_PER_LUN,
 	.emulated		= ATA_SHT_EMULATED,
 	.use_clustering		= ATA_SHT_USE_CLUSTERING,
@@ -166,11 +166,11 @@
 	return addr;
 }
 
-static u32 sis_scr_cfg_read (struct ata_port *ap, unsigned int sc_reg)
+static u32 sis_scr_cfg_read (struct ata_port *ap, unsigned int sc_reg, u32 *val)
 {
 	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
 	unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg);
-	u32 val, val2 = 0;
+	u32 val2 = 0;
 	u8 pmr;
 
 	if (sc_reg == SCR_ERROR) /* doesn't exist in PCI cfg space */
@@ -178,13 +178,16 @@
 
 	pci_read_config_byte(pdev, SIS_PMR, &pmr);
 
-	pci_read_config_dword(pdev, cfg_addr, &val);
+	pci_read_config_dword(pdev, cfg_addr, val);
 
 	if ((pdev->device == 0x0182) || (pdev->device == 0x0183) ||
 	    (pdev->device == 0x1182) || (pmr & SIS_PMR_COMBINED))
 		pci_read_config_dword(pdev, cfg_addr+0x10, &val2);
 
-	return (val|val2) &  0xfffffffb; /* avoid problems with powerdowned ports */
+	*val |= val2;
+	*val &= 0xfffffffb;	/* avoid problems with powerdowned ports */
+
+	return 0;
 }
 
 static void sis_scr_cfg_write (struct ata_port *ap, unsigned int sc_reg, u32 val)
@@ -214,7 +217,7 @@
 		return -EINVAL;
 
 	if (ap->flags & SIS_FLAG_CFGSCR)
-		return sis_scr_cfg_read(ap, sc_reg);
+		return sis_scr_cfg_read(ap, sc_reg, val);
 
 	pci_read_config_byte(pdev, SIS_PMR, &pmr);
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index cb136a9..ac4a0cb 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -188,7 +188,7 @@
 		if (signal_pending(current)) {
 			siginfo_t info;
 			printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
-				current->pid, current->comm,
+				task_pid_nr(current), current->comm,
 				dequeue_signal_lock(current, &current->blocked, &info));
 			result = -EINTR;
 			sock_shutdown(lo, !send);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index d70745c..af05610 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1107,7 +1107,7 @@
 		       is the default case! */
 		    cdinfo(CD_OPEN, "bummer. wrong media type.\n"); 
 		    cdinfo(CD_WARNING, "pid %d must open device O_NONBLOCK!\n",
-					(unsigned int)current->pid); 
+					(unsigned int)task_pid_nr(current));
 		    ret=-EMEDIUMTYPE;
 		    goto clean_up_and_return;
 		}
diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c
index 856774f..d24a6c2 100644
--- a/drivers/char/drm/drm_bufs.c
+++ b/drivers/char/drm/drm_bufs.c
@@ -1456,7 +1456,7 @@
 		buf = dma->buflist[idx];
 		if (buf->file_priv != file_priv) {
 			DRM_ERROR("Process %d freeing buffer not owned\n",
-				  current->pid);
+				  task_pid_nr(current));
 			return -EINVAL;
 		}
 		drm_free_buffer(dev, buf);
diff --git a/drivers/char/drm/drm_drv.c b/drivers/char/drm/drm_drv.c
index 72668b1..44a4626 100644
--- a/drivers/char/drm/drm_drv.c
+++ b/drivers/char/drm/drm_drv.c
@@ -463,7 +463,7 @@
 	++file_priv->ioctl_count;
 
 	DRM_DEBUG("pid=%d, cmd=0x%02x, nr=0x%02x, dev 0x%lx, auth=%d\n",
-		  current->pid, cmd, nr,
+		  task_pid_nr(current), cmd, nr,
 		  (long)old_encode_dev(file_priv->head->device),
 		  file_priv->authenticated);
 
diff --git a/drivers/char/drm/drm_fops.c b/drivers/char/drm/drm_fops.c
index f383fc3..3992f732 100644
--- a/drivers/char/drm/drm_fops.c
+++ b/drivers/char/drm/drm_fops.c
@@ -234,7 +234,7 @@
 	if (!drm_cpu_valid())
 		return -EINVAL;
 
-	DRM_DEBUG("pid = %d, minor = %d\n", current->pid, minor);
+	DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor);
 
 	priv = drm_alloc(sizeof(*priv), DRM_MEM_FILES);
 	if (!priv)
@@ -244,7 +244,7 @@
 	filp->private_data = priv;
 	priv->filp = filp;
 	priv->uid = current->euid;
-	priv->pid = current->pid;
+	priv->pid = task_pid_nr(current);
 	priv->minor = minor;
 	priv->head = drm_heads[minor];
 	priv->ioctl_count = 0;
@@ -339,7 +339,8 @@
 	 */
 
 	DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n",
-		  current->pid, (long)old_encode_dev(file_priv->head->device),
+		  task_pid_nr(current),
+		  (long)old_encode_dev(file_priv->head->device),
 		  dev->open_count);
 
 	if (dev->driver->reclaim_buffers_locked && dev->lock.hw_lock) {
diff --git a/drivers/char/drm/drm_lock.c b/drivers/char/drm/drm_lock.c
index c6b73e7..bea2a7d 100644
--- a/drivers/char/drm/drm_lock.c
+++ b/drivers/char/drm/drm_lock.c
@@ -58,12 +58,12 @@
 
 	if (lock->context == DRM_KERNEL_CONTEXT) {
 		DRM_ERROR("Process %d using kernel context %d\n",
-			  current->pid, lock->context);
+			  task_pid_nr(current), lock->context);
 		return -EINVAL;
 	}
 
 	DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n",
-		  lock->context, current->pid,
+		  lock->context, task_pid_nr(current),
 		  dev->lock.hw_lock->lock, lock->flags);
 
 	if (drm_core_check_feature(dev, DRIVER_DMA_QUEUE))
@@ -153,7 +153,7 @@
 
 	if (lock->context == DRM_KERNEL_CONTEXT) {
 		DRM_ERROR("Process %d using kernel context %d\n",
-			  current->pid, lock->context);
+			  task_pid_nr(current), lock->context);
 		return -EINVAL;
 	}
 
diff --git a/drivers/char/drm/drm_os_linux.h b/drivers/char/drm/drm_os_linux.h
index 114e54e..76e44ac 100644
--- a/drivers/char/drm/drm_os_linux.h
+++ b/drivers/char/drm/drm_os_linux.h
@@ -7,7 +7,7 @@
 #include <linux/delay.h>
 
 /** Current process ID */
-#define DRM_CURRENTPID			current->pid
+#define DRM_CURRENTPID			task_pid_nr(current)
 #define DRM_SUSER(p)			capable(CAP_SYS_ADMIN)
 #define DRM_UDELAY(d)			udelay(d)
 /** Read a byte from a MMIO region */
diff --git a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c
index 8e841bd..eb381a7 100644
--- a/drivers/char/drm/i810_dma.c
+++ b/drivers/char/drm/i810_dma.c
@@ -1024,7 +1024,7 @@
 	retcode = i810_dma_get_buffer(dev, d, file_priv);
 
 	DRM_DEBUG("i810_dma: %d returning %d, granted = %d\n",
-		  current->pid, retcode, d->granted);
+		  task_pid_nr(current), retcode, d->granted);
 
 	sarea_priv->last_dispatch = (int)hw_status[5];
 
diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c
index 43a1f78..69a363e 100644
--- a/drivers/char/drm/i830_dma.c
+++ b/drivers/char/drm/i830_dma.c
@@ -1409,7 +1409,7 @@
 	retcode = i830_dma_get_buffer(dev, d, file_priv);
 
 	DRM_DEBUG("i830_dma: %d returning %d, granted = %d\n",
-		  current->pid, retcode, d->granted);
+		  task_pid_nr(current), retcode, d->granted);
 
 	sarea_priv->last_dispatch = (int)hw_status[5];
 
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index 2e7ae42..0f8fb13 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -58,10 +58,10 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/delay.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/bitops.h>
 
 #include <asm/dma.h>
 #include <linux/slab.h>
diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index 212276a..fc54d23 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -42,6 +42,7 @@
 #include <linux/sysrq.h>
 #include <linux/input.h>
 #include <linux/reboot.h>
+#include <linux/notifier.h>
 
 extern void ctrl_alt_del(void);
 
@@ -81,7 +82,8 @@
 typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value,
 			    char up_flag);
 static k_handler_fn K_HANDLERS;
-static k_handler_fn *k_handler[16] = { K_HANDLERS };
+k_handler_fn *k_handler[16] = { K_HANDLERS };
+EXPORT_SYMBOL_GPL(k_handler);
 
 #define FN_HANDLERS\
 	fn_null,	fn_enter,	fn_show_ptregs,	fn_show_mem,\
@@ -127,7 +129,7 @@
  */
 
 static struct input_handler kbd_handler;
-static unsigned long key_down[NBITS(KEY_MAX)];		/* keyboard key bitmap */
+static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)];	/* keyboard key bitmap */
 static unsigned char shift_down[NR_SHIFT];		/* shift state counters.. */
 static int dead_key_next;
 static int npadch = -1;					/* -1 or number assembled on pad */
@@ -160,6 +162,23 @@
 static int sysrq_alt;
 
 /*
+ * Notifier list for console keyboard events
+ */
+static ATOMIC_NOTIFIER_HEAD(keyboard_notifier_list);
+
+int register_keyboard_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&keyboard_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_keyboard_notifier);
+
+int unregister_keyboard_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&keyboard_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_keyboard_notifier);
+
+/*
  * Translation of scancodes to keycodes. We set them on only the first
  * keyboard in the list that accepts the scancode and keycode.
  * Explanation for not choosing the first attached keyboard anymore:
@@ -1130,6 +1149,7 @@
 	unsigned char type, raw_mode;
 	struct tty_struct *tty;
 	int shift_final;
+	struct keyboard_notifier_param param = { .vc = vc, .value = keycode, .down = down };
 
 	tty = vc->vc_tty;
 
@@ -1217,10 +1237,11 @@
 		return;
 	}
 
-	shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
+	param.shift = shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
 	key_map = key_maps[shift_final];
 
-	if (!key_map) {
+	if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYCODE, &param) == NOTIFY_STOP || !key_map) {
+		atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNBOUND_KEYCODE, &param);
 		compute_shiftstate();
 		kbd->slockstate = 0;
 		return;
@@ -1237,6 +1258,9 @@
 	type = KTYP(keysym);
 
 	if (type < 0xf0) {
+		param.value = keysym;
+		if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNICODE, &param) == NOTIFY_STOP)
+			return;
 		if (down && !raw_mode)
 			to_utf8(vc, keysym);
 		return;
@@ -1244,9 +1268,6 @@
 
 	type -= 0xf0;
 
-	if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
-		return;
-
 	if (type == KT_LETTER) {
 		type = KT_LATIN;
 		if (vc_kbd_led(kbd, VC_CAPSLOCK)) {
@@ -1255,9 +1276,18 @@
 				keysym = key_map[keycode];
 		}
 	}
+	param.value = keysym;
+
+	if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYSYM, &param) == NOTIFY_STOP)
+		return;
+
+	if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
+		return;
 
 	(*k_handler[type])(vc, keysym & 0xff, !down);
 
+	atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, &param);
+
 	if (type != KT_SLOCK)
 		kbd->slockstate = 0;
 }
@@ -1347,12 +1377,12 @@
 static const struct input_device_id kbd_ids[] = {
 	{
                 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
-                .evbit = { BIT(EV_KEY) },
+                .evbit = { BIT_MASK(EV_KEY) },
         },
 
 	{
                 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
-                .evbit = { BIT(EV_SND) },
+                .evbit = { BIT_MASK(EV_SND) },
         },
 
 	{ },    /* Terminating entry */
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 661aca0..fd0abef 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -56,11 +56,11 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #include "mxser.h"
diff --git a/drivers/char/mxser_new.c b/drivers/char/mxser_new.c
index 854dbf5..081c84c 100644
--- a/drivers/char/mxser_new.c
+++ b/drivers/char/mxser_new.c
@@ -39,11 +39,11 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #include "mxser_new.h"
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 8598585..9782cb4 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -1178,9 +1178,9 @@
 	jog_dev->id.bustype = BUS_ISA;
 	jog_dev->id.vendor = PCI_VENDOR_ID_SONY;
 
-	jog_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	jog_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_MIDDLE);
-	jog_dev->relbit[0] = BIT(REL_WHEEL);
+	jog_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	jog_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_MIDDLE);
+	jog_dev->relbit[0] = BIT_MASK(REL_WHEEL);
 
 	sonypi_device.input_key_dev = key_dev = input_allocate_device();
 	if (!key_dev) {
@@ -1193,7 +1193,7 @@
 	key_dev->id.vendor = PCI_VENDOR_ID_SONY;
 
 	/* Initialize the Input Drivers: special keys */
-	key_dev->evbit[0] = BIT(EV_KEY);
+	key_dev->evbit[0] = BIT_MASK(EV_KEY);
 	for (i = 0; sonypi_inputkeys[i].sonypiev; i++)
 		if (sonypi_inputkeys[i].inputev)
 			set_bit(sonypi_inputkeys[i].inputev, key_dev->keybit);
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 85a2328..a6e1c9b 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -1467,7 +1467,7 @@
 
 	line = tty->index;
 	sx_dprintk(SX_DEBUG_OPEN, "%d: opening line %d. tty=%p ctty=%p, "
-			"np=%d)\n", current->pid, line, tty,
+			"np=%d)\n", task_pid_nr(current), line, tty,
 			current->signal->tty, sx_nports);
 
 	if ((line < 0) || (line >= SX_NPORTS) || (line >= sx_nports))
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 78d1493..de60e1e 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -251,7 +251,7 @@
 	struct task_struct *p;
 
 	for_each_process(p) {
-		if (p->mm && !is_init(p))
+		if (p->mm && !is_global_init(p))
 			/* Not swapper, init nor kernel thread */
 			force_sig(sig, p);
 	}
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 9c867cf..13a5357 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -103,6 +103,7 @@
 #include <linux/selection.h>
 
 #include <linux/kmod.h>
+#include <linux/nsproxy.h>
 
 #undef TTY_DEBUG_HANGUP
 
@@ -3107,7 +3108,7 @@
 	 */
 	if (tty == real_tty && current->signal->tty != real_tty)
 		return -ENOTTY;
-	return put_user(pid_nr(real_tty->pgrp), p);
+	return put_user(pid_vnr(real_tty->pgrp), p);
 }
 
 /**
@@ -3141,7 +3142,7 @@
 	if (pgrp_nr < 0)
 		return -EINVAL;
 	rcu_read_lock();
-	pgrp = find_pid(pgrp_nr);
+	pgrp = find_vpid(pgrp_nr);
 	retval = -ESRCH;
 	if (!pgrp)
 		goto out_unlock;
@@ -3178,7 +3179,7 @@
 		return -ENOTTY;
 	if (!real_tty->session)
 		return -ENOTTY;
-	return put_user(pid_nr(real_tty->session), p);
+	return put_user(pid_vnr(real_tty->session), p);
 }
 
 /**
@@ -3528,8 +3529,8 @@
 	/* Kill the entire session */
 	do_each_pid_task(session, PIDTYPE_SID, p) {
 		printk(KERN_NOTICE "SAK: killed process %d"
-			" (%s): process_session(p)==tty->session\n",
-			p->pid, p->comm);
+			" (%s): task_session_nr(p)==tty->session\n",
+			task_pid_nr(p), p->comm);
 		send_sig(SIGKILL, p, 1);
 	} while_each_pid_task(session, PIDTYPE_SID, p);
 	/* Now kill any processes that happen to have the
@@ -3538,8 +3539,8 @@
 	do_each_thread(g, p) {
 		if (p->signal->tty == tty) {
 			printk(KERN_NOTICE "SAK: killed process %d"
-			    " (%s): process_session(p)==tty->session\n",
-			    p->pid, p->comm);
+			    " (%s): task_session_nr(p)==tty->session\n",
+			    task_pid_nr(p), p->comm);
 			send_sig(SIGKILL, p, 1);
 			continue;
 		}
@@ -3559,7 +3560,7 @@
 				    filp->private_data == tty) {
 					printk(KERN_NOTICE "SAK: killed process %d"
 					    " (%s): fd#%d opened to the tty\n",
-					    p->pid, p->comm, i);
+					    task_pid_nr(p), p->comm, i);
 					force_sig(SIGKILL, p);
 					break;
 				}
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 1764c67..7a5badf 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -99,6 +99,7 @@
 #include <linux/pm.h>
 #include <linux/font.h>
 #include <linux/bitops.h>
+#include <linux/notifier.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -223,6 +224,35 @@
 };
 
 /*
+ * Notifier list for console events.
+ */
+static ATOMIC_NOTIFIER_HEAD(vt_notifier_list);
+
+int register_vt_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&vt_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_vt_notifier);
+
+int unregister_vt_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&vt_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_vt_notifier);
+
+static void notify_write(struct vc_data *vc, unsigned int unicode)
+{
+	struct vt_notifier_param param = { .vc = vc, unicode = unicode };
+	atomic_notifier_call_chain(&vt_notifier_list, VT_WRITE, &param);
+}
+
+static void notify_update(struct vc_data *vc)
+{
+	struct vt_notifier_param param = { .vc = vc };
+	atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, &param);
+}
+
+/*
  *	Low-Level Functions
  */
 
@@ -718,6 +748,7 @@
 		return -ENXIO;
 	if (!vc_cons[currcons].d) {
 	    struct vc_data *vc;
+	    struct vt_notifier_param param;
 
 	    /* prevent users from taking too much memory */
 	    if (currcons >= MAX_NR_USER_CONSOLES && !capable(CAP_SYS_RESOURCE))
@@ -729,7 +760,7 @@
 	    /* although the numbers above are not valid since long ago, the
 	       point is still up-to-date and the comment still has its value
 	       even if only as a historical artifact.  --mj, July 1998 */
-	    vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
+	    param.vc = vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
 	    if (!vc)
 		return -ENOMEM;
 	    vc_cons[currcons].d = vc;
@@ -746,6 +777,7 @@
 	    }
 	    vc->vc_kmalloced = 1;
 	    vc_init(vc, vc->vc_rows, vc->vc_cols, 1);
+	    atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, &param);
 	}
 	return 0;
 }
@@ -907,6 +939,8 @@
 
 	if (vc_cons_allocated(currcons)) {
 		struct vc_data *vc = vc_cons[currcons].d;
+		struct vt_notifier_param param = { .vc = vc };
+		atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, &param);
 		vc->vc_sw->con_deinit(vc);
 		put_pid(vc->vt_pid);
 		module_put(vc->vc_sw->owner);
@@ -1019,6 +1053,7 @@
 		vc->vc_pos += vc->vc_size_row;
 	}
 	vc->vc_need_wrap = 0;
+	notify_write(vc, '\n');
 }
 
 static void ri(struct vc_data *vc)
@@ -1039,6 +1074,7 @@
 {
 	vc->vc_pos -= vc->vc_x << 1;
 	vc->vc_need_wrap = vc->vc_x = 0;
+	notify_write(vc, '\r');
 }
 
 static inline void bs(struct vc_data *vc)
@@ -1047,6 +1083,7 @@
 		vc->vc_pos -= 2;
 		vc->vc_x--;
 		vc->vc_need_wrap = 0;
+		notify_write(vc, '\b');
 	}
 }
 
@@ -1593,6 +1630,7 @@
 				break;
 		}
 		vc->vc_pos += (vc->vc_x << 1);
+		notify_write(vc, '\t');
 		return;
 	case 10: case 11: case 12:
 		lf(vc);
@@ -2252,6 +2290,7 @@
 				tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */
 				if (tc < 0) tc = ' ';
 			}
+			notify_write(vc, c);
 
 			if (inverse) {
 				FLUSH
@@ -2274,6 +2313,7 @@
 	release_console_sem();
 
 out:
+	notify_update(vc);
 	return n;
 #undef FLUSH
 }
@@ -2317,6 +2357,7 @@
 		do_blank_screen(0);
 		blank_timer_expired = 0;
 	}
+	notify_update(vc_cons[fg_console].d);
 
 	release_console_sem();
 }
@@ -2418,6 +2459,7 @@
 				continue;
 		}
 		scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos);
+		notify_write(vc, c);
 		cnt++;
 		if (myx == vc->vc_cols - 1) {
 			vc->vc_need_wrap = 1;
@@ -2436,6 +2478,7 @@
 		}
 	}
 	set_cursor(vc);
+	notify_update(vc);
 
 quit:
 	clear_bit(0, &printing);
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
new file mode 100644
index 0000000..3bed412
--- /dev/null
+++ b/drivers/cpuidle/Kconfig
@@ -0,0 +1,20 @@
+
+config CPU_IDLE
+	bool "CPU idle PM support"
+	help
+	  CPU idle is a generic framework for supporting software-controlled
+	  idle processor power management.  It includes modular cross-platform
+	  governors that can be swapped during runtime.
+
+	  If you're using a mobile platform that supports CPU idle PM (e.g.
+	  an ACPI-capable notebook), you should say Y here.
+
+config CPU_IDLE_GOV_LADDER
+	bool
+	depends on CPU_IDLE
+	default y
+
+config CPU_IDLE_GOV_MENU
+	bool
+	depends on CPU_IDLE && NO_HZ
+	default y
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
new file mode 100644
index 0000000..5634f88
--- /dev/null
+++ b/drivers/cpuidle/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for cpuidle.
+#
+
+obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
new file mode 100644
index 0000000..fdf4106
--- /dev/null
+++ b/drivers/cpuidle/cpuidle.c
@@ -0,0 +1,295 @@
+/*
+ * cpuidle.c - core cpuidle infrastructure
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/latency.h>
+#include <linux/cpu.h>
+#include <linux/cpuidle.h>
+
+#include "cpuidle.h"
+
+DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+EXPORT_PER_CPU_SYMBOL_GPL(cpuidle_devices);
+
+DEFINE_MUTEX(cpuidle_lock);
+LIST_HEAD(cpuidle_detected_devices);
+static void (*pm_idle_old)(void);
+
+static int enabled_devices;
+
+/**
+ * cpuidle_idle_call - the main idle loop
+ *
+ * NOTE: no locks or semaphores should be used here
+ */
+static void cpuidle_idle_call(void)
+{
+	struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
+	struct cpuidle_state *target_state;
+	int next_state;
+
+	/* check if the device is ready */
+	if (!dev || !dev->enabled) {
+		if (pm_idle_old)
+			pm_idle_old();
+		else
+			local_irq_enable();
+		return;
+	}
+
+	/* ask the governor for the next state */
+	next_state = cpuidle_curr_governor->select(dev);
+	if (need_resched())
+		return;
+	target_state = &dev->states[next_state];
+
+	/* enter the state and update stats */
+	dev->last_residency = target_state->enter(dev, target_state);
+	dev->last_state = target_state;
+	target_state->time += dev->last_residency;
+	target_state->usage++;
+
+	/* give the governor an opportunity to reflect on the outcome */
+	if (cpuidle_curr_governor->reflect)
+		cpuidle_curr_governor->reflect(dev);
+}
+
+/**
+ * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
+ */
+void cpuidle_install_idle_handler(void)
+{
+	if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
+		/* Make sure all changes finished before we switch to new idle */
+		smp_wmb();
+		pm_idle = cpuidle_idle_call;
+	}
+}
+
+/**
+ * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
+ */
+void cpuidle_uninstall_idle_handler(void)
+{
+	if (enabled_devices && (pm_idle != pm_idle_old)) {
+		pm_idle = pm_idle_old;
+		cpu_idle_wait();
+	}
+}
+
+/**
+ * cpuidle_pause_and_lock - temporarily disables CPUIDLE
+ */
+void cpuidle_pause_and_lock(void)
+{
+	mutex_lock(&cpuidle_lock);
+	cpuidle_uninstall_idle_handler();
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
+
+/**
+ * cpuidle_resume_and_unlock - resumes CPUIDLE operation
+ */
+void cpuidle_resume_and_unlock(void)
+{
+	cpuidle_install_idle_handler();
+	mutex_unlock(&cpuidle_lock);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
+
+/**
+ * cpuidle_enable_device - enables idle PM for a CPU
+ * @dev: the CPU
+ *
+ * This function must be called between cpuidle_pause_and_lock and
+ * cpuidle_resume_and_unlock when used externally.
+ */
+int cpuidle_enable_device(struct cpuidle_device *dev)
+{
+	int ret, i;
+
+	if (dev->enabled)
+		return 0;
+	if (!cpuidle_curr_driver || !cpuidle_curr_governor)
+		return -EIO;
+	if (!dev->state_count)
+		return -EINVAL;
+
+	if ((ret = cpuidle_add_state_sysfs(dev)))
+		return ret;
+
+	if (cpuidle_curr_governor->enable &&
+	    (ret = cpuidle_curr_governor->enable(dev)))
+		goto fail_sysfs;
+
+	for (i = 0; i < dev->state_count; i++) {
+		dev->states[i].usage = 0;
+		dev->states[i].time = 0;
+	}
+	dev->last_residency = 0;
+	dev->last_state = NULL;
+
+	smp_wmb();
+
+	dev->enabled = 1;
+
+	enabled_devices++;
+	return 0;
+
+fail_sysfs:
+	cpuidle_remove_state_sysfs(dev);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_enable_device);
+
+/**
+ * cpuidle_disable_device - disables idle PM for a CPU
+ * @dev: the CPU
+ *
+ * This function must be called between cpuidle_pause_and_lock and
+ * cpuidle_resume_and_unlock when used externally.
+ */
+void cpuidle_disable_device(struct cpuidle_device *dev)
+{
+	if (!dev->enabled)
+		return;
+	if (!cpuidle_curr_driver || !cpuidle_curr_governor)
+		return;
+
+	dev->enabled = 0;
+
+	if (cpuidle_curr_governor->disable)
+		cpuidle_curr_governor->disable(dev);
+
+	cpuidle_remove_state_sysfs(dev);
+	enabled_devices--;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_disable_device);
+
+/**
+ * cpuidle_register_device - registers a CPU's idle PM feature
+ * @dev: the cpu
+ */
+int cpuidle_register_device(struct cpuidle_device *dev)
+{
+	int ret;
+	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
+
+	if (!sys_dev)
+		return -EINVAL;
+	if (!try_module_get(cpuidle_curr_driver->owner))
+		return -EINVAL;
+
+	init_completion(&dev->kobj_unregister);
+
+	mutex_lock(&cpuidle_lock);
+
+	per_cpu(cpuidle_devices, dev->cpu) = dev;
+	list_add(&dev->device_list, &cpuidle_detected_devices);
+	if ((ret = cpuidle_add_sysfs(sys_dev))) {
+		mutex_unlock(&cpuidle_lock);
+		module_put(cpuidle_curr_driver->owner);
+		return ret;
+	}
+
+	cpuidle_enable_device(dev);
+	cpuidle_install_idle_handler();
+
+	mutex_unlock(&cpuidle_lock);
+
+	return 0;
+
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_register_device);
+
+/**
+ * cpuidle_unregister_device - unregisters a CPU's idle PM feature
+ * @dev: the cpu
+ */
+void cpuidle_unregister_device(struct cpuidle_device *dev)
+{
+	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
+
+	cpuidle_pause_and_lock();
+
+	cpuidle_disable_device(dev);
+
+	cpuidle_remove_sysfs(sys_dev);
+	list_del(&dev->device_list);
+	wait_for_completion(&dev->kobj_unregister);
+	per_cpu(cpuidle_devices, dev->cpu) = NULL;
+
+	cpuidle_resume_and_unlock();
+
+	module_put(cpuidle_curr_driver->owner);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
+
+#ifdef CONFIG_SMP
+
+static void smp_callback(void *v)
+{
+	/* we already woke the CPU up, nothing more to do */
+}
+
+/*
+ * This function gets called when a part of the kernel has a new latency
+ * requirement.  This means we need to get all processors out of their C-state,
+ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
+ * wakes them all right up.
+ */
+static int cpuidle_latency_notify(struct notifier_block *b,
+		unsigned long l, void *v)
+{
+	smp_call_function(smp_callback, NULL, 0, 1);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpuidle_latency_notifier = {
+	.notifier_call = cpuidle_latency_notify,
+};
+
+#define latency_notifier_init(x) do { register_latency_notifier(x); } while (0)
+
+#else /* CONFIG_SMP */
+
+#define latency_notifier_init(x) do { } while (0)
+
+#endif /* CONFIG_SMP */
+
+/**
+ * cpuidle_init - core initializer
+ */
+static int __init cpuidle_init(void)
+{
+	int ret;
+
+	pm_idle_old = pm_idle;
+
+	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
+	if (ret)
+		return ret;
+
+	latency_notifier_init(&cpuidle_latency_notifier);
+
+	return 0;
+}
+
+core_initcall(cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
new file mode 100644
index 0000000..9476ba3
--- /dev/null
+++ b/drivers/cpuidle/cpuidle.h
@@ -0,0 +1,33 @@
+/*
+ * cpuidle.h - The internal header file
+ */
+
+#ifndef __DRIVER_CPUIDLE_H
+#define __DRIVER_CPUIDLE_H
+
+#include <linux/sysdev.h>
+
+/* For internal use only */
+extern struct cpuidle_governor *cpuidle_curr_governor;
+extern struct cpuidle_driver *cpuidle_curr_driver;
+extern struct list_head cpuidle_governors;
+extern struct list_head cpuidle_detected_devices;
+extern struct mutex cpuidle_lock;
+extern spinlock_t cpuidle_driver_lock;
+
+/* idle loop */
+extern void cpuidle_install_idle_handler(void);
+extern void cpuidle_uninstall_idle_handler(void);
+
+/* governors */
+extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
+
+/* sysfs */
+extern int cpuidle_add_class_sysfs(struct sysdev_class *cls);
+extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls);
+extern int cpuidle_add_state_sysfs(struct cpuidle_device *device);
+extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device);
+extern int cpuidle_add_sysfs(struct sys_device *sysdev);
+extern void cpuidle_remove_sysfs(struct sys_device *sysdev);
+
+#endif /* __DRIVER_CPUIDLE_H */
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
new file mode 100644
index 0000000..2257004
--- /dev/null
+++ b/drivers/cpuidle/driver.c
@@ -0,0 +1,56 @@
+/*
+ * driver.c - driver support
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/cpuidle.h>
+
+#include "cpuidle.h"
+
+struct cpuidle_driver *cpuidle_curr_driver;
+DEFINE_SPINLOCK(cpuidle_driver_lock);
+
+/**
+ * cpuidle_register_driver - registers a driver
+ * @drv: the driver
+ */
+int cpuidle_register_driver(struct cpuidle_driver *drv)
+{
+	if (!drv)
+		return -EINVAL;
+
+	spin_lock(&cpuidle_driver_lock);
+	if (cpuidle_curr_driver) {
+		spin_unlock(&cpuidle_driver_lock);
+		return -EBUSY;
+	}
+	cpuidle_curr_driver = drv;
+	spin_unlock(&cpuidle_driver_lock);
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_register_driver);
+
+/**
+ * cpuidle_unregister_driver - unregisters a driver
+ * @drv: the driver
+ */
+void cpuidle_unregister_driver(struct cpuidle_driver *drv)
+{
+	if (!drv)
+		return;
+
+	spin_lock(&cpuidle_driver_lock);
+	cpuidle_curr_driver = NULL;
+	spin_unlock(&cpuidle_driver_lock);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
new file mode 100644
index 0000000..bb699cb2
--- /dev/null
+++ b/drivers/cpuidle/governor.c
@@ -0,0 +1,141 @@
+/*
+ * governor.c - governor support
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/cpuidle.h>
+
+#include "cpuidle.h"
+
+LIST_HEAD(cpuidle_governors);
+struct cpuidle_governor *cpuidle_curr_governor;
+
+/**
+ * __cpuidle_find_governor - finds a governor of the specified name
+ * @str: the name
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
+{
+	struct cpuidle_governor *gov;
+
+	list_for_each_entry(gov, &cpuidle_governors, governor_list)
+		if (!strnicmp(str, gov->name, CPUIDLE_NAME_LEN))
+			return gov;
+
+	return NULL;
+}
+
+/**
+ * cpuidle_switch_governor - changes the governor
+ * @gov: the new target governor
+ *
+ * NOTE: "gov" can be NULL to specify disabled
+ * Must be called with cpuidle_lock aquired.
+ */
+int cpuidle_switch_governor(struct cpuidle_governor *gov)
+{
+	struct cpuidle_device *dev;
+
+	if (gov == cpuidle_curr_governor)
+		return 0;
+
+	cpuidle_uninstall_idle_handler();
+
+	if (cpuidle_curr_governor) {
+		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+			cpuidle_disable_device(dev);
+		module_put(cpuidle_curr_governor->owner);
+	}
+
+	cpuidle_curr_governor = gov;
+
+	if (gov) {
+		if (!try_module_get(cpuidle_curr_governor->owner))
+			return -EINVAL;
+		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+			cpuidle_enable_device(dev);
+		cpuidle_install_idle_handler();
+		printk(KERN_INFO "cpuidle: using governor %s\n", gov->name);
+	}
+
+	return 0;
+}
+
+/**
+ * cpuidle_register_governor - registers a governor
+ * @gov: the governor
+ */
+int cpuidle_register_governor(struct cpuidle_governor *gov)
+{
+	int ret = -EEXIST;
+
+	if (!gov || !gov->select)
+		return -EINVAL;
+
+	mutex_lock(&cpuidle_lock);
+	if (__cpuidle_find_governor(gov->name) == NULL) {
+		ret = 0;
+		list_add_tail(&gov->governor_list, &cpuidle_governors);
+		if (!cpuidle_curr_governor ||
+		    cpuidle_curr_governor->rating < gov->rating)
+			cpuidle_switch_governor(gov);
+	}
+	mutex_unlock(&cpuidle_lock);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_register_governor);
+
+/**
+ * cpuidle_replace_governor - find a replacement governor
+ * @exclude_rating: the rating that will be skipped while looking for
+ * new governor.
+ */
+static struct cpuidle_governor *cpuidle_replace_governor(int exclude_rating)
+{
+	struct cpuidle_governor *gov;
+	struct cpuidle_governor *ret_gov = NULL;
+	unsigned int max_rating = 0;
+
+	list_for_each_entry(gov, &cpuidle_governors, governor_list) {
+		if (gov->rating == exclude_rating)
+			continue;
+		if (gov->rating > max_rating) {
+			max_rating = gov->rating;
+			ret_gov = gov;
+		}
+	}
+
+	return ret_gov;
+}
+
+/**
+ * cpuidle_unregister_governor - unregisters a governor
+ * @gov: the governor
+ */
+void cpuidle_unregister_governor(struct cpuidle_governor *gov)
+{
+	if (!gov)
+		return;
+
+	mutex_lock(&cpuidle_lock);
+	if (gov == cpuidle_curr_governor) {
+		struct cpuidle_governor *new_gov;
+		new_gov = cpuidle_replace_governor(gov->rating);
+		cpuidle_switch_governor(new_gov);
+	}
+	list_del(&gov->governor_list);
+	mutex_unlock(&cpuidle_lock);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_unregister_governor);
diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile
new file mode 100644
index 0000000..1b51272
--- /dev/null
+++ b/drivers/cpuidle/governors/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for cpuidle governors.
+#
+
+obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
+obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
new file mode 100644
index 0000000..eb666ec
--- /dev/null
+++ b/drivers/cpuidle/governors/ladder.c
@@ -0,0 +1,166 @@
+/*
+ * ladder.c - the residency ladder algorithm
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/latency.h>
+#include <linux/moduleparam.h>
+#include <linux/jiffies.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#define PROMOTION_COUNT 4
+#define DEMOTION_COUNT 1
+
+struct ladder_device_state {
+	struct {
+		u32 promotion_count;
+		u32 demotion_count;
+		u32 promotion_time;
+		u32 demotion_time;
+	} threshold;
+	struct {
+		int promotion_count;
+		int demotion_count;
+	} stats;
+};
+
+struct ladder_device {
+	struct ladder_device_state states[CPUIDLE_STATE_MAX];
+	int last_state_idx;
+};
+
+static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
+
+/**
+ * ladder_do_selection - prepares private data for a state change
+ * @ldev: the ladder device
+ * @old_idx: the current state index
+ * @new_idx: the new target state index
+ */
+static inline void ladder_do_selection(struct ladder_device *ldev,
+				       int old_idx, int new_idx)
+{
+	ldev->states[old_idx].stats.promotion_count = 0;
+	ldev->states[old_idx].stats.demotion_count = 0;
+	ldev->last_state_idx = new_idx;
+}
+
+/**
+ * ladder_select_state - selects the next state to enter
+ * @dev: the CPU
+ */
+static int ladder_select_state(struct cpuidle_device *dev)
+{
+	struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
+	struct ladder_device_state *last_state;
+	int last_residency, last_idx = ldev->last_state_idx;
+
+	if (unlikely(!ldev))
+		return 0;
+
+	last_state = &ldev->states[last_idx];
+
+	if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID)
+		last_residency = cpuidle_get_last_residency(dev) - dev->states[last_idx].exit_latency;
+	else
+		last_residency = last_state->threshold.promotion_time + 1;
+
+	/* consider promotion */
+	if (last_idx < dev->state_count - 1 &&
+	    last_residency > last_state->threshold.promotion_time &&
+	    dev->states[last_idx + 1].exit_latency <= system_latency_constraint()) {
+		last_state->stats.promotion_count++;
+		last_state->stats.demotion_count = 0;
+		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
+			ladder_do_selection(ldev, last_idx, last_idx + 1);
+			return last_idx + 1;
+		}
+	}
+
+	/* consider demotion */
+	if (last_idx > 0 &&
+	    last_residency < last_state->threshold.demotion_time) {
+		last_state->stats.demotion_count++;
+		last_state->stats.promotion_count = 0;
+		if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
+			ladder_do_selection(ldev, last_idx, last_idx - 1);
+			return last_idx - 1;
+		}
+	}
+
+	/* otherwise remain at the current state */
+	return last_idx;
+}
+
+/**
+ * ladder_enable_device - setup for the governor
+ * @dev: the CPU
+ */
+static int ladder_enable_device(struct cpuidle_device *dev)
+{
+	int i;
+	struct ladder_device *ldev = &per_cpu(ladder_devices, dev->cpu);
+	struct ladder_device_state *lstate;
+	struct cpuidle_state *state;
+
+	ldev->last_state_idx = 0;
+
+	for (i = 0; i < dev->state_count; i++) {
+		state = &dev->states[i];
+		lstate = &ldev->states[i];
+
+		lstate->stats.promotion_count = 0;
+		lstate->stats.demotion_count = 0;
+
+		lstate->threshold.promotion_count = PROMOTION_COUNT;
+		lstate->threshold.demotion_count = DEMOTION_COUNT;
+
+		if (i < dev->state_count - 1)
+			lstate->threshold.promotion_time = state->exit_latency;
+		if (i > 0)
+			lstate->threshold.demotion_time = state->exit_latency;
+	}
+
+	return 0;
+}
+
+static struct cpuidle_governor ladder_governor = {
+	.name =		"ladder",
+	.rating =	10,
+	.enable =	ladder_enable_device,
+	.select =	ladder_select_state,
+	.owner =	THIS_MODULE,
+};
+
+/**
+ * init_ladder - initializes the governor
+ */
+static int __init init_ladder(void)
+{
+	return cpuidle_register_governor(&ladder_governor);
+}
+
+/**
+ * exit_ladder - exits the governor
+ */
+static void __exit exit_ladder(void)
+{
+	cpuidle_unregister_governor(&ladder_governor);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_ladder);
+module_exit(exit_ladder);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
new file mode 100644
index 0000000..299d45c
--- /dev/null
+++ b/drivers/cpuidle/governors/menu.c
@@ -0,0 +1,137 @@
+/*
+ * menu.c - the menu idle governor
+ *
+ * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/latency.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+
+#define BREAK_FUZZ	4	/* 4 us */
+
+struct menu_device {
+	int		last_state_idx;
+
+	unsigned int	expected_us;
+	unsigned int	predicted_us;
+	unsigned int	last_measured_us;
+	unsigned int	elapsed_us;
+};
+
+static DEFINE_PER_CPU(struct menu_device, menu_devices);
+
+/**
+ * menu_select - selects the next idle state to enter
+ * @dev: the CPU
+ */
+static int menu_select(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &__get_cpu_var(menu_devices);
+	int i;
+
+	/* determine the expected residency time */
+	data->expected_us =
+		(u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
+
+	/* find the deepest idle state that satisfies our constraints */
+	for (i = 1; i < dev->state_count; i++) {
+		struct cpuidle_state *s = &dev->states[i];
+
+		if (s->target_residency > data->expected_us)
+			break;
+		if (s->target_residency > data->predicted_us)
+			break;
+		if (s->exit_latency > system_latency_constraint())
+			break;
+	}
+
+	data->last_state_idx = i - 1;
+	return i - 1;
+}
+
+/**
+ * menu_reflect - attempts to guess what happened after entry
+ * @dev: the CPU
+ *
+ * NOTE: it's important to be fast here because this operation will add to
+ *       the overall exit latency.
+ */
+static void menu_reflect(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &__get_cpu_var(menu_devices);
+	int last_idx = data->last_state_idx;
+	unsigned int measured_us =
+		cpuidle_get_last_residency(dev) + data->elapsed_us;
+	struct cpuidle_state *target = &dev->states[last_idx];
+
+	/*
+	 * Ugh, this idle state doesn't support residency measurements, so we
+	 * are basically lost in the dark.  As a compromise, assume we slept
+	 * for one full standard timer tick.  However, be aware that this
+	 * could potentially result in a suboptimal state transition.
+	 */
+	if (!(target->flags & CPUIDLE_FLAG_TIME_VALID))
+		measured_us = USEC_PER_SEC / HZ;
+
+	/* Predict time remaining until next break event */
+	if (measured_us + BREAK_FUZZ < data->expected_us - target->exit_latency) {
+		data->predicted_us = max(measured_us, data->last_measured_us);
+		data->last_measured_us = measured_us;
+		data->elapsed_us = 0;
+	} else {
+		if (data->elapsed_us < data->elapsed_us + measured_us)
+			data->elapsed_us = measured_us;
+		else
+			data->elapsed_us = -1;
+		data->predicted_us = max(measured_us, data->last_measured_us);
+	}
+}
+
+/**
+ * menu_enable_device - scans a CPU's states and does setup
+ * @dev: the CPU
+ */
+static int menu_enable_device(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
+
+	memset(data, 0, sizeof(struct menu_device));
+
+	return 0;
+}
+
+static struct cpuidle_governor menu_governor = {
+	.name =		"menu",
+	.rating =	20,
+	.enable =	menu_enable_device,
+	.select =	menu_select,
+	.reflect =	menu_reflect,
+	.owner =	THIS_MODULE,
+};
+
+/**
+ * init_menu - initializes the governor
+ */
+static int __init init_menu(void)
+{
+	return cpuidle_register_governor(&menu_governor);
+}
+
+/**
+ * exit_menu - exits the governor
+ */
+static void __exit exit_menu(void)
+{
+	cpuidle_unregister_governor(&menu_governor);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_menu);
+module_exit(exit_menu);
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
new file mode 100644
index 0000000..0f3515e
--- /dev/null
+++ b/drivers/cpuidle/sysfs.c
@@ -0,0 +1,361 @@
+/*
+ * sysfs.c - sysfs support
+ *
+ * (C) 2006-2007 Shaohua Li <shaohua.li@intel.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/sysfs.h>
+#include <linux/cpu.h>
+
+#include "cpuidle.h"
+
+static unsigned int sysfs_switch;
+static int __init cpuidle_sysfs_setup(char *unused)
+{
+	sysfs_switch = 1;
+	return 1;
+}
+__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
+
+static ssize_t show_available_governors(struct sys_device *dev, char *buf)
+{
+	ssize_t i = 0;
+	struct cpuidle_governor *tmp;
+
+	mutex_lock(&cpuidle_lock);
+	list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
+		if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
+			goto out;
+		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
+	}
+
+out:
+	i+= sprintf(&buf[i], "\n");
+	mutex_unlock(&cpuidle_lock);
+	return i;
+}
+
+static ssize_t show_current_driver(struct sys_device *dev, char *buf)
+{
+	ssize_t ret;
+
+	spin_lock(&cpuidle_driver_lock);
+	if (cpuidle_curr_driver)
+		ret = sprintf(buf, "%s\n", cpuidle_curr_driver->name);
+	else
+		ret = sprintf(buf, "none\n");
+	spin_unlock(&cpuidle_driver_lock);
+
+	return ret;
+}
+
+static ssize_t show_current_governor(struct sys_device *dev, char *buf)
+{
+	ssize_t ret;
+
+	mutex_lock(&cpuidle_lock);
+	if (cpuidle_curr_governor)
+		ret = sprintf(buf, "%s\n", cpuidle_curr_governor->name);
+	else
+		ret = sprintf(buf, "none\n");
+	mutex_unlock(&cpuidle_lock);
+
+	return ret;
+}
+
+static ssize_t store_current_governor(struct sys_device *dev,
+	const char *buf, size_t count)
+{
+	char gov_name[CPUIDLE_NAME_LEN];
+	int ret = -EINVAL;
+	size_t len = count;
+	struct cpuidle_governor *gov;
+
+	if (!len || len >= sizeof(gov_name))
+		return -EINVAL;
+
+	memcpy(gov_name, buf, len);
+	gov_name[len] = '\0';
+	if (gov_name[len - 1] == '\n')
+		gov_name[--len] = '\0';
+
+	mutex_lock(&cpuidle_lock);
+
+	list_for_each_entry(gov, &cpuidle_governors, governor_list) {
+		if (strlen(gov->name) == len && !strcmp(gov->name, gov_name)) {
+			ret = cpuidle_switch_governor(gov);
+			break;
+		}
+	}
+
+	mutex_unlock(&cpuidle_lock);
+
+	if (ret)
+		return ret;
+	else
+		return count;
+}
+
+static SYSDEV_ATTR(current_driver, 0444, show_current_driver, NULL);
+static SYSDEV_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
+
+static struct attribute *cpuclass_default_attrs[] = {
+	&attr_current_driver.attr,
+	&attr_current_governor_ro.attr,
+	NULL
+};
+
+static SYSDEV_ATTR(available_governors, 0444, show_available_governors, NULL);
+static SYSDEV_ATTR(current_governor, 0644, show_current_governor,
+	store_current_governor);
+
+static struct attribute *cpuclass_switch_attrs[] = {
+	&attr_available_governors.attr,
+	&attr_current_driver.attr,
+	&attr_current_governor.attr,
+	NULL
+};
+
+static struct attribute_group cpuclass_attr_group = {
+	.attrs = cpuclass_default_attrs,
+	.name = "cpuidle",
+};
+
+/**
+ * cpuidle_add_class_sysfs - add CPU global sysfs attributes
+ */
+int cpuidle_add_class_sysfs(struct sysdev_class *cls)
+{
+	if (sysfs_switch)
+		cpuclass_attr_group.attrs = cpuclass_switch_attrs;
+
+	return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group);
+}
+
+/**
+ * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes
+ */
+void cpuidle_remove_class_sysfs(struct sysdev_class *cls)
+{
+	sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group);
+}
+
+struct cpuidle_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cpuidle_device *, char *);
+	ssize_t (*store)(struct cpuidle_device *, const char *, size_t count);
+};
+
+#define define_one_ro(_name, show) \
+	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
+#define define_one_rw(_name, show, store) \
+	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
+
+#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj)
+#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
+static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf)
+{
+	int ret = -EIO;
+	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+
+	if (cattr->show) {
+		mutex_lock(&cpuidle_lock);
+		ret = cattr->show(dev, buf);
+		mutex_unlock(&cpuidle_lock);
+	}
+	return ret;
+}
+
+static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
+		     const char * buf, size_t count)
+{
+	int ret = -EIO;
+	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+
+	if (cattr->store) {
+		mutex_lock(&cpuidle_lock);
+		ret = cattr->store(dev, buf, count);
+		mutex_unlock(&cpuidle_lock);
+	}
+	return ret;
+}
+
+static struct sysfs_ops cpuidle_sysfs_ops = {
+	.show = cpuidle_show,
+	.store = cpuidle_store,
+};
+
+static void cpuidle_sysfs_release(struct kobject *kobj)
+{
+	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+
+	complete(&dev->kobj_unregister);
+}
+
+static struct kobj_type ktype_cpuidle = {
+	.sysfs_ops = &cpuidle_sysfs_ops,
+	.release = cpuidle_sysfs_release,
+};
+
+struct cpuidle_state_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cpuidle_state *, char *);
+	ssize_t (*store)(struct cpuidle_state *, const char *, size_t);
+};
+
+#define define_one_state_ro(_name, show) \
+static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
+
+#define define_show_state_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
+{ \
+	return sprintf(buf, "%u\n", state->_name);\
+}
+
+static ssize_t show_state_name(struct cpuidle_state *state, char *buf)
+{
+	return sprintf(buf, "%s\n", state->name);
+}
+
+define_show_state_function(exit_latency)
+define_show_state_function(power_usage)
+define_show_state_function(usage)
+define_show_state_function(time)
+define_one_state_ro(name, show_state_name);
+define_one_state_ro(latency, show_state_exit_latency);
+define_one_state_ro(power, show_state_power_usage);
+define_one_state_ro(usage, show_state_usage);
+define_one_state_ro(time, show_state_time);
+
+static struct attribute *cpuidle_state_default_attrs[] = {
+	&attr_name.attr,
+	&attr_latency.attr,
+	&attr_power.attr,
+	&attr_usage.attr,
+	&attr_time.attr,
+	NULL
+};
+
+#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
+#define kobj_to_state(k) (kobj_to_state_obj(k)->state)
+#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
+static ssize_t cpuidle_state_show(struct kobject * kobj,
+	struct attribute * attr ,char * buf)
+{
+	int ret = -EIO;
+	struct cpuidle_state *state = kobj_to_state(kobj);
+	struct cpuidle_state_attr * cattr = attr_to_stateattr(attr);
+
+	if (cattr->show)
+		ret = cattr->show(state, buf);
+
+	return ret;
+}
+
+static struct sysfs_ops cpuidle_state_sysfs_ops = {
+	.show = cpuidle_state_show,
+};
+
+static void cpuidle_state_sysfs_release(struct kobject *kobj)
+{
+	struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj);
+
+	complete(&state_obj->kobj_unregister);
+}
+
+static struct kobj_type ktype_state_cpuidle = {
+	.sysfs_ops = &cpuidle_state_sysfs_ops,
+	.default_attrs = cpuidle_state_default_attrs,
+	.release = cpuidle_state_sysfs_release,
+};
+
+static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i)
+{
+	kobject_unregister(&device->kobjs[i]->kobj);
+	wait_for_completion(&device->kobjs[i]->kobj_unregister);
+	kfree(device->kobjs[i]);
+	device->kobjs[i] = NULL;
+}
+
+/**
+ * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes
+ * @device: the target device
+ */
+int cpuidle_add_state_sysfs(struct cpuidle_device *device)
+{
+	int i, ret = -ENOMEM;
+	struct cpuidle_state_kobj *kobj;
+
+	/* state statistics */
+	for (i = 0; i < device->state_count; i++) {
+		kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
+		if (!kobj)
+			goto error_state;
+		kobj->state = &device->states[i];
+		init_completion(&kobj->kobj_unregister);
+
+		kobj->kobj.parent = &device->kobj;
+		kobj->kobj.ktype = &ktype_state_cpuidle;
+		kobject_set_name(&kobj->kobj, "state%d", i);
+		ret = kobject_register(&kobj->kobj);
+		if (ret) {
+			kfree(kobj);
+			goto error_state;
+		}
+		device->kobjs[i] = kobj;
+	}
+
+	return 0;
+
+error_state:
+	for (i = i - 1; i >= 0; i--)
+		cpuidle_free_state_kobj(device, i);
+	return ret;
+}
+
+/**
+ * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes
+ * @device: the target device
+ */
+void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
+{
+	int i;
+
+	for (i = 0; i < device->state_count; i++)
+		cpuidle_free_state_kobj(device, i);
+}
+
+/**
+ * cpuidle_add_sysfs - creates a sysfs instance for the target device
+ * @sysdev: the target device
+ */
+int cpuidle_add_sysfs(struct sys_device *sysdev)
+{
+	int cpu = sysdev->id;
+	struct cpuidle_device *dev;
+
+	dev = per_cpu(cpuidle_devices, cpu);
+	dev->kobj.parent = &sysdev->kobj;
+	dev->kobj.ktype = &ktype_cpuidle;
+	kobject_set_name(&dev->kobj, "%s", "cpuidle");
+	return kobject_register(&dev->kobj);
+}
+
+/**
+ * cpuidle_remove_sysfs - deletes a sysfs instance on the target device
+ * @sysdev: the target device
+ */
+void cpuidle_remove_sysfs(struct sys_device *sysdev)
+{
+	int cpu = sysdev->id;
+	struct cpuidle_device *dev;
+
+	dev = per_cpu(cpuidle_devices, cpu);
+	kobject_unregister(&dev->kobj);
+}
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index e80af67..2d23e30 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -94,8 +94,6 @@
 
 #endif				/* !CONFIG_EDAC_DEBUG */
 
-#define BIT(x) (1 << (x))
-
 #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
 	PCI_DEVICE_ID_ ## vend ## _ ## dev
 
diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c
index e66cdd4..9007d06 100644
--- a/drivers/edac/pasemi_edac.c
+++ b/drivers/edac/pasemi_edac.c
@@ -270,6 +270,7 @@
 
 static const struct pci_device_id pasemi_edac_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_PASEMI, 0xa00a) },
+	{ }
 };
 
 MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl);
diff --git a/drivers/firmware/dcdbas.h b/drivers/firmware/dcdbas.h
index dcdba0f..87bc341 100644
--- a/drivers/firmware/dcdbas.h
+++ b/drivers/firmware/dcdbas.h
@@ -17,7 +17,6 @@
 #define _DCDBAS_H_
 
 #include <linux/device.h>
-#include <linux/input.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
 
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index a702e2f..1ca6f46 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -113,13 +113,13 @@
 
 	if (count > HID_MIN_BUFFER_SIZE) {
 		printk(KERN_WARNING "hidraw: pid %d passed too large report\n",
-				current->pid);
+				task_pid_nr(current));
 		return -EINVAL;
 	}
 
 	if (count < 2) {
 		printk(KERN_WARNING "hidraw: pid %d passed too short report\n",
-				current->pid);
+				task_pid_nr(current));
 		return -EINVAL;
 	}
 
diff --git a/drivers/hid/usbhid/usbkbd.c b/drivers/hid/usbhid/usbkbd.c
index b76b02f..775a1ef 100644
--- a/drivers/hid/usbhid/usbkbd.c
+++ b/drivers/hid/usbhid/usbkbd.c
@@ -274,8 +274,11 @@
 
 	input_set_drvdata(input_dev, kbd);
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_REP);
-	input_dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL) | BIT(LED_COMPOSE) | BIT(LED_KANA);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
+		BIT_MASK(EV_REP);
+	input_dev->ledbit[0] = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
+		BIT_MASK(LED_SCROLLL) | BIT_MASK(LED_COMPOSE) |
+		BIT_MASK(LED_KANA);
 
 	for (i = 0; i < 255; i++)
 		set_bit(usb_kbd_keycode[i], input_dev->keybit);
diff --git a/drivers/hid/usbhid/usbmouse.c b/drivers/hid/usbhid/usbmouse.c
index 5345c73..f8ad691 100644
--- a/drivers/hid/usbhid/usbmouse.c
+++ b/drivers/hid/usbhid/usbmouse.c
@@ -173,11 +173,13 @@
 	usb_to_input_id(dev, &input_dev->id);
 	input_dev->dev.parent = &intf->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-	input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-	input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-	input_dev->relbit[0] |= BIT(REL_WHEEL);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) |
+		BIT_MASK(BTN_EXTRA);
+	input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
 
 	input_set_drvdata(input_dev, mouse);
 
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 4879125..1001d2e 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -1099,7 +1099,7 @@
 	idev->name = "applesmc";
 	idev->id.bustype = BUS_HOST;
 	idev->dev.parent = &pdev->dev;
-	idev->evbit[0] = BIT(EV_ABS);
+	idev->evbit[0] = BIT_MASK(EV_ABS);
 	input_set_abs_params(idev, ABS_X,
 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
 	input_set_abs_params(idev, ABS_Y,
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 6f66551..5c82ec7 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -150,7 +150,7 @@
 static int __devinit coretemp_probe(struct platform_device *pdev)
 {
 	struct coretemp_data *data;
-	struct cpuinfo_x86 *c = &(cpu_data)[pdev->id];
+	struct cpuinfo_x86 *c = &cpu_data(pdev->id);
 	int err;
 	u32 eax, edx;
 
@@ -359,7 +359,7 @@
 	struct pdev_entry *p, *n;
 
 	/* quick check if we run Intel */
-	if (cpu_data[0].x86_vendor != X86_VENDOR_INTEL)
+	if (cpu_data(0).x86_vendor != X86_VENDOR_INTEL)
 		goto exit;
 
 	err = platform_driver_register(&coretemp_driver);
@@ -367,7 +367,7 @@
 		goto exit;
 
 	for_each_online_cpu(i) {
-		struct cpuinfo_x86 *c = &(cpu_data)[i];
+		struct cpuinfo_x86 *c = &cpu_data(i);
 
 		/* check if family 6, models e, f, 16 */
 		if ((c->cpuid_level < 0) || (c->x86 != 0x6) ||
diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c
index 8a7ae03..bab5fd2 100644
--- a/drivers/hwmon/hdaps.c
+++ b/drivers/hwmon/hdaps.c
@@ -574,7 +574,7 @@
 	idev = hdaps_idev->input;
 	idev->name = "hdaps";
 	idev->dev.parent = &pdev->dev;
-	idev->evbit[0] = BIT(EV_ABS);
+	idev->evbit[0] = BIT_MASK(EV_ABS);
 	input_set_abs_params(idev, ABS_X,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
 	input_set_abs_params(idev, ABS_Y,
diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c
index f17e771..3330667 100644
--- a/drivers/hwmon/hwmon-vid.c
+++ b/drivers/hwmon/hwmon-vid.c
@@ -200,7 +200,7 @@
 
 u8 vid_which_vrm(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 eax;
 	u8 eff_family, eff_model, eff_stepping, vrm_ret;
 
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 00fad11..6426a61 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -85,7 +85,7 @@
 	const char *set;
 	const char *unset;
 };
-#define BIT(m, s, u)	{ .mask = m, .set = s, .unset = u }
+#define PXA_BIT(m, s, u)	{ .mask = m, .set = s, .unset = u }
 
 static inline void
 decode_bits(const char *prefix, const struct bits *bits, int num, u32 val)
@@ -100,17 +100,17 @@
 }
 
 static const struct bits isr_bits[] = {
-	BIT(ISR_RWM,	"RX",		"TX"),
-	BIT(ISR_ACKNAK,	"NAK",		"ACK"),
-	BIT(ISR_UB,	"Bsy",		"Rdy"),
-	BIT(ISR_IBB,	"BusBsy",	"BusRdy"),
-	BIT(ISR_SSD,	"SlaveStop",	NULL),
-	BIT(ISR_ALD,	"ALD",		NULL),
-	BIT(ISR_ITE,	"TxEmpty",	NULL),
-	BIT(ISR_IRF,	"RxFull",	NULL),
-	BIT(ISR_GCAD,	"GenCall",	NULL),
-	BIT(ISR_SAD,	"SlaveAddr",	NULL),
-	BIT(ISR_BED,	"BusErr",	NULL),
+	PXA_BIT(ISR_RWM,	"RX",		"TX"),
+	PXA_BIT(ISR_ACKNAK,	"NAK",		"ACK"),
+	PXA_BIT(ISR_UB,		"Bsy",		"Rdy"),
+	PXA_BIT(ISR_IBB,	"BusBsy",	"BusRdy"),
+	PXA_BIT(ISR_SSD,	"SlaveStop",	NULL),
+	PXA_BIT(ISR_ALD,	"ALD",		NULL),
+	PXA_BIT(ISR_ITE,	"TxEmpty",	NULL),
+	PXA_BIT(ISR_IRF,	"RxFull",	NULL),
+	PXA_BIT(ISR_GCAD,	"GenCall",	NULL),
+	PXA_BIT(ISR_SAD,	"SlaveAddr",	NULL),
+	PXA_BIT(ISR_BED,	"BusErr",	NULL),
 };
 
 static void decode_ISR(unsigned int val)
@@ -120,21 +120,21 @@
 }
 
 static const struct bits icr_bits[] = {
-	BIT(ICR_START,  "START",	NULL),
-	BIT(ICR_STOP,   "STOP",		NULL),
-	BIT(ICR_ACKNAK, "ACKNAK",	NULL),
-	BIT(ICR_TB,     "TB",		NULL),
-	BIT(ICR_MA,     "MA",		NULL),
-	BIT(ICR_SCLE,   "SCLE",		"scle"),
-	BIT(ICR_IUE,    "IUE",		"iue"),
-	BIT(ICR_GCD,    "GCD",		NULL),
-	BIT(ICR_ITEIE,  "ITEIE",	NULL),
-	BIT(ICR_IRFIE,  "IRFIE",	NULL),
-	BIT(ICR_BEIE,   "BEIE",		NULL),
-	BIT(ICR_SSDIE,  "SSDIE",	NULL),
-	BIT(ICR_ALDIE,  "ALDIE",	NULL),
-	BIT(ICR_SADIE,  "SADIE",	NULL),
-	BIT(ICR_UR,     "UR",		"ur"),
+	PXA_BIT(ICR_START,  "START",	NULL),
+	PXA_BIT(ICR_STOP,   "STOP",	NULL),
+	PXA_BIT(ICR_ACKNAK, "ACKNAK",	NULL),
+	PXA_BIT(ICR_TB,     "TB",	NULL),
+	PXA_BIT(ICR_MA,     "MA",	NULL),
+	PXA_BIT(ICR_SCLE,   "SCLE",	"scle"),
+	PXA_BIT(ICR_IUE,    "IUE",	"iue"),
+	PXA_BIT(ICR_GCD,    "GCD",	NULL),
+	PXA_BIT(ICR_ITEIE,  "ITEIE",	NULL),
+	PXA_BIT(ICR_IRFIE,  "IRFIE",	NULL),
+	PXA_BIT(ICR_BEIE,   "BEIE",	NULL),
+	PXA_BIT(ICR_SSDIE,  "SSDIE",	NULL),
+	PXA_BIT(ICR_ALDIE,  "ALDIE",	NULL),
+	PXA_BIT(ICR_SADIE,  "SADIE",	NULL),
+	PXA_BIT(ICR_UR,     "UR",		"ur"),
 };
 
 static void decode_ICR(unsigned int val)
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 6d9fd92..6eaece9 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -1056,6 +1056,9 @@
 config BLK_DEV_IDEDMA
 	def_bool BLK_DEV_IDEDMA_PCI || BLK_DEV_IDEDMA_PMAC || BLK_DEV_IDEDMA_ICS || BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 
+config IDE_ARCH_OBSOLETE_INIT
+	def_bool ALPHA || (ARM && !ARCH_L7200) || BLACKFIN || X86 || IA64 || M32R || MIPS || PARISC || PPC || (SUPERH64 && BLK_DEV_IDEPCI) || SPARC
+
 endif
 
 config BLK_DEV_HD_ONLY
diff --git a/drivers/ide/arm/bast-ide.c b/drivers/ide/arm/bast-ide.c
index f7449d0..48db616 100644
--- a/drivers/ide/arm/bast-ide.c
+++ b/drivers/ide/arm/bast-ide.c
@@ -45,7 +45,7 @@
 	hw.io_ports[IDE_CONTROL_OFFSET] = aux + (6 * 0x20);
 	hw.irq = irq;
 
-	ide_register_hw(&hw, 0, hwif);
+	ide_register_hw(&hw, NULL, 0, hwif);
 
 	return 0;
 }
diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c
index 3af33fb..410a0d1 100644
--- a/drivers/ide/arm/icside.c
+++ b/drivers/ide/arm/icside.c
@@ -316,27 +316,29 @@
 
 	drive->waiting_for_dma = 0;
 
-	disable_dma(hwif->hw.dma);
+	disable_dma(state->dev->dma);
 
 	/* Teardown mappings after DMA has completed. */
 	dma_unmap_sg(state->dev, hwif->sg_table, hwif->sg_nents,
 		     hwif->sg_dma_direction);
 
-	return get_dma_residue(hwif->hw.dma) != 0;
+	return get_dma_residue(state->dev->dma) != 0;
 }
 
 static void icside_dma_start(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = HWIF(drive);
+	struct icside_state *state = hwif->hwif_data;
 
 	/* We can not enable DMA on both channels simultaneously. */
-	BUG_ON(dma_channel_active(hwif->hw.dma));
-	enable_dma(hwif->hw.dma);
+	BUG_ON(dma_channel_active(state->dev->dma));
+	enable_dma(state->dev->dma);
 }
 
 static int icside_dma_setup(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = HWIF(drive);
+	struct icside_state *state = hwif->hwif_data;
 	struct request *rq = hwif->hwgroup->rq;
 	unsigned int dma_mode;
 
@@ -348,7 +350,7 @@
 	/*
 	 * We can not enable DMA on both channels.
 	 */
-	BUG_ON(dma_channel_active(hwif->hw.dma));
+	BUG_ON(dma_channel_active(state->dev->dma));
 
 	icside_build_sglist(drive, rq);
 
@@ -365,14 +367,14 @@
 	/*
 	 * Select the correct timing for this drive.
 	 */
-	set_dma_speed(hwif->hw.dma, drive->drive_data);
+	set_dma_speed(state->dev->dma, drive->drive_data);
 
 	/*
 	 * Tell the DMA engine about the SG table and
 	 * data direction.
 	 */
-	set_dma_sg(hwif->hw.dma, hwif->sg_table, hwif->sg_nents);
-	set_dma_mode(hwif->hw.dma, dma_mode);
+	set_dma_sg(state->dev->dma, hwif->sg_table, hwif->sg_nents);
+	set_dma_mode(state->dev->dma, dma_mode);
 
 	drive->waiting_for_dma = 1;
 
@@ -438,40 +440,16 @@
 #define icside_dma_init(hwif)	(0)
 #endif
 
-static ide_hwif_t *icside_find_hwif(unsigned long dataport)
-{
-	ide_hwif_t *hwif;
-	int index;
-
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = &ide_hwifs[index];
-		if (hwif->io_ports[IDE_DATA_OFFSET] == dataport)
-			goto found;
-	}
-
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = &ide_hwifs[index];
-		if (!hwif->io_ports[IDE_DATA_OFFSET])
-			goto found;
-	}
-
-	hwif = NULL;
-found:
-	return hwif;
-}
-
 static ide_hwif_t *
 icside_setup(void __iomem *base, struct cardinfo *info, struct expansion_card *ec)
 {
 	unsigned long port = (unsigned long)base + info->dataoffset;
 	ide_hwif_t *hwif;
 
-	hwif = icside_find_hwif(port);
+	hwif = ide_find_port(port);
 	if (hwif) {
 		int i;
 
-		memset(&hwif->hw, 0, sizeof(hw_regs_t));
-
 		/*
 		 * Ensure we're using MMIO
 		 */
@@ -479,13 +457,10 @@
 		hwif->mmio = 1;
 
 		for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
-			hwif->hw.io_ports[i] = port;
 			hwif->io_ports[i] = port;
 			port += 1 << info->stepping;
 		}
-		hwif->hw.io_ports[IDE_CONTROL_OFFSET] = (unsigned long)base + info->ctrloffset;
 		hwif->io_ports[IDE_CONTROL_OFFSET] = (unsigned long)base + info->ctrloffset;
-		hwif->hw.irq  = ec->irq;
 		hwif->irq     = ec->irq;
 		hwif->noprobe = 0;
 		hwif->chipset = ide_acorn;
@@ -500,6 +475,7 @@
 {
 	ide_hwif_t *hwif;
 	void __iomem *base;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
 	if (!base)
@@ -523,9 +499,9 @@
 
 	state->hwif[0] = hwif;
 
-	probe_hwif_init(hwif);
+	idx[0] = hwif->index;
 
-	ide_proc_register_port(hwif);
+	ide_device_add(idx);
 
 	return 0;
 }
@@ -537,6 +513,7 @@
 	void __iomem *ioc_base, *easi_base;
 	unsigned int sel = 0;
 	int ret;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
 	if (!ioc_base) {
@@ -592,7 +569,6 @@
 	hwif->serialized  = 1;
 	hwif->config_data = (unsigned long)ioc_base;
 	hwif->select_data = sel;
-	hwif->hw.dma      = ec->dma;
 
 	mate->maskproc    = icside_maskproc;
 	mate->channel     = 1;
@@ -601,18 +577,16 @@
 	mate->serialized  = 1;
 	mate->config_data = (unsigned long)ioc_base;
 	mate->select_data = sel | 1;
-	mate->hw.dma      = ec->dma;
 
 	if (ec->dma != NO_DMA && !request_dma(ec->dma, hwif->name)) {
 		icside_dma_init(hwif);
 		icside_dma_init(mate);
 	}
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
+	idx[0] = hwif->index;
+	idx[1] = mate->index;
 
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 
diff --git a/drivers/ide/arm/ide_arm.c b/drivers/ide/arm/ide_arm.c
index bce2bec..8957cba 100644
--- a/drivers/ide/arm/ide_arm.c
+++ b/drivers/ide/arm/ide_arm.c
@@ -31,5 +31,5 @@
 	memset(&hw, 0, sizeof(hw));
 	ide_std_init_ports(&hw, IDE_ARM_IO, IDE_ARM_IO + 0x206);
 	hw.irq = IDE_ARM_IRQ;
-	ide_register_hw(&hw, 1, NULL);
+	ide_register_hw(&hw, NULL, 1, NULL);
 }
diff --git a/drivers/ide/arm/rapide.c b/drivers/ide/arm/rapide.c
index 83811af..0775a3a 100644
--- a/drivers/ide/arm/rapide.c
+++ b/drivers/ide/arm/rapide.c
@@ -13,42 +13,25 @@
 
 #include <asm/ecard.h>
 
-/*
- * Something like this really should be in generic code, but isn't.
- */
 static ide_hwif_t *
 rapide_locate_hwif(void __iomem *base, void __iomem *ctrl, unsigned int sz, int irq)
 {
 	unsigned long port = (unsigned long)base;
-	ide_hwif_t *hwif;
-	int index, i;
+	ide_hwif_t *hwif = ide_find_port(port);
+	int i;
 
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = ide_hwifs + index;
-		if (hwif->io_ports[IDE_DATA_OFFSET] == port)
-			goto found;
-	}
+	if (hwif == NULL)
+		goto out;
 
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = ide_hwifs + index;
-		if (hwif->io_ports[IDE_DATA_OFFSET] == 0)
-			goto found;
-	}
-
-	return NULL;
-
- found:
 	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
-		hwif->hw.io_ports[i] = port;
 		hwif->io_ports[i] = port;
 		port += sz;
 	}
-	hwif->hw.io_ports[IDE_CONTROL_OFFSET] = (unsigned long)ctrl;
 	hwif->io_ports[IDE_CONTROL_OFFSET] = (unsigned long)ctrl;
-	hwif->hw.irq = hwif->irq = irq;
+	hwif->irq = irq;
 	hwif->mmio = 1;
 	default_hwif_mmiops(hwif);
-
+out:
 	return hwif;
 }
 
@@ -58,6 +41,7 @@
 	ide_hwif_t *hwif;
 	void __iomem *base;
 	int ret;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	ret = ecard_request_resources(ec);
 	if (ret)
@@ -74,8 +58,11 @@
 		hwif->hwif_data = base;
 		hwif->gendev.parent = &ec->dev;
 		hwif->noprobe = 0;
-		probe_hwif_init(hwif);
-		ide_proc_register_port(hwif);
+
+		idx[0] = hwif->index;
+
+		ide_device_add(idx);
+
 		ecard_set_drvdata(ec, hwif);
 		goto out;
 	}
diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c
index 9a96a10..ff20377 100644
--- a/drivers/ide/cris/ide-cris.c
+++ b/drivers/ide/cris/ide-cris.c
@@ -782,7 +782,7 @@
 		                ide_offsets,
 		                0, 0, cris_ide_ack_intr,
 		                ide_default_irq(0));
-		ide_register_hw(&hw, 1, &hwif);
+		ide_register_hw(&hw, NULL, 1, &hwif);
 		hwif->mmio = 1;
 		hwif->chipset = ide_etrax100;
 		hwif->set_pio_mode = &cris_set_pio_mode;
diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c
index 6d26ad7..4a49b5c 100644
--- a/drivers/ide/h8300/ide-h8300.c
+++ b/drivers/ide/h8300/ide-h8300.c
@@ -68,7 +68,6 @@
 		hw->io_ports[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i;
 	hw->io_ports[IDE_CONTROL_OFFSET] = CONFIG_H8300_IDE_ALT;
 	hw->irq = EXT_IRQ0 + CONFIG_H8300_IDE_IRQ;
-	hw->dma = NO_DMA;
 	hw->chipset = ide_generic;
 }
 
@@ -101,7 +100,7 @@
 	hw_setup(&hw);
 
 	/* register if */
-	idx = ide_register_hw(&hw, 1, &hwif);
+	idx = ide_register_hw(&hw, NULL, 1, &hwif);
 	if (idx == -1) {
 		printk(KERN_ERR "ide-h8300: IDE I/F register failed\n");
 		return;
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 1d5f682..89df48f 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -350,7 +350,7 @@
 
 	memset(&args, 0, sizeof(ide_task_t));
 	args.command_type = IDE_DRIVE_TASK_NO_DATA;
-	args.data_phase   = TASKFILE_IN;
+	args.data_phase   = TASKFILE_NO_DATA;
 	args.handler      = &task_no_data_intr;
 
 	/* convert gtf to IDE Taskfile */
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 2722d91..00123d99 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -593,28 +593,12 @@
 	return ide_raw_taskfile(drive, &args, NULL);
 }
 
-static int get_smart_values(ide_drive_t *drive, u8 *buf)
+static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd)
 {
 	ide_task_t args;
 
 	memset(&args, 0, sizeof(ide_task_t));
-	args.tfRegister[IDE_FEATURE_OFFSET]	= SMART_READ_VALUES;
-	args.tfRegister[IDE_NSECTOR_OFFSET]	= 0x01;
-	args.tfRegister[IDE_LCYL_OFFSET]	= SMART_LCYL_PASS;
-	args.tfRegister[IDE_HCYL_OFFSET]	= SMART_HCYL_PASS;
-	args.tfRegister[IDE_COMMAND_OFFSET]	= WIN_SMART;
-	args.command_type			= IDE_DRIVE_TASK_IN;
-	args.data_phase				= TASKFILE_IN;
-	args.handler				= &task_in_intr;
-	(void) smart_enable(drive);
-	return ide_raw_taskfile(drive, &args, buf);
-}
-
-static int get_smart_thresholds(ide_drive_t *drive, u8 *buf)
-{
-	ide_task_t args;
-	memset(&args, 0, sizeof(ide_task_t));
-	args.tfRegister[IDE_FEATURE_OFFSET]	= SMART_READ_THRESHOLDS;
+	args.tfRegister[IDE_FEATURE_OFFSET]	= sub_cmd;
 	args.tfRegister[IDE_NSECTOR_OFFSET]	= 0x01;
 	args.tfRegister[IDE_LCYL_OFFSET]	= SMART_LCYL_PASS;
 	args.tfRegister[IDE_HCYL_OFFSET]	= SMART_HCYL_PASS;
@@ -656,7 +640,7 @@
 	ide_drive_t	*drive = (ide_drive_t *)data;
 	int		len = 0, i = 0;
 
-	if (!get_smart_thresholds(drive, page)) {
+	if (get_smart_data(drive, page, SMART_READ_THRESHOLDS) == 0) {
 		unsigned short *val = (unsigned short *) page;
 		char *out = ((char *)val) + (SECTOR_WORDS * 4);
 		page = out;
@@ -675,7 +659,7 @@
 	ide_drive_t	*drive = (ide_drive_t *)data;
 	int		len = 0, i = 0;
 
-	if (!get_smart_values(drive, page)) {
+	if (get_smart_data(drive, page, SMART_READ_VALUES) == 0) {
 		unsigned short *val = (unsigned short *) page;
 		char *out = ((char *)val) + (SECTOR_WORDS * 4);
 		page = out;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 80b4f17..428f7a8 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -901,10 +901,7 @@
 
 EXPORT_SYMBOL(ide_dma_timeout);
 
-/*
- * Needed for allowing full modular support of ide-driver
- */
-static int ide_release_dma_engine(ide_hwif_t *hwif)
+static void ide_release_dma_engine(ide_hwif_t *hwif)
 {
 	if (hwif->dmatable_cpu) {
 		pci_free_consistent(hwif->pci_dev,
@@ -913,7 +910,6 @@
 				    hwif->dmatable_dma);
 		hwif->dmatable_cpu = NULL;
 	}
-	return 1;
 }
 
 static int ide_release_iomio_dma(ide_hwif_t *hwif)
@@ -956,12 +952,6 @@
 {
 	printk(KERN_INFO "    %s: MMIO-DMA ", hwif->name);
 
- 	hwif->dma_base = base;
-
-	if(hwif->mate)
-		hwif->dma_master = (hwif->channel) ? hwif->mate->dma_base : base;
-	else
-		hwif->dma_master = base;
 	return 0;
 }
 
@@ -975,8 +965,6 @@
 		return 1;
 	}
 
-	hwif->dma_base = base;
-
 	if (hwif->cds->extra) {
 		hwif->extra_base = base + (hwif->channel ? 8 : 16);
 
@@ -991,10 +979,6 @@
 		}
 	}
 
-	if(hwif->mate)
-		hwif->dma_master = (hwif->channel) ? hwif->mate->dma_base:base;
-	else
-		hwif->dma_master = base;
 	return 0;
 }
 
@@ -1006,12 +990,9 @@
 	return ide_iomio_dma(hwif, base, ports);
 }
 
-/*
- * This can be called for a dynamically installed interface. Don't __init it
- */
-void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_ports)
+void ide_setup_dma(ide_hwif_t *hwif, unsigned long base, unsigned num_ports)
 {
-	if (ide_dma_iobase(hwif, dma_base, num_ports))
+	if (ide_dma_iobase(hwif, base, num_ports))
 		return;
 
 	if (ide_allocate_dma_engine(hwif)) {
@@ -1019,6 +1000,13 @@
 		return;
 	}
 
+	hwif->dma_base = base;
+
+	if (hwif->mate)
+		hwif->dma_master = hwif->channel ? hwif->mate->dma_base : base;
+	else
+		hwif->dma_master = base;
+
 	if (!(hwif->dma_command))
 		hwif->dma_command	= hwif->dma_base;
 	if (!(hwif->dma_vendor1))
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 5c8b008..c89f0d3 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -47,15 +47,15 @@
 #include <linux/device.h>
 #include <linux/kmod.h>
 #include <linux/scatterlist.h>
+#include <linux/bitops.h>
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/bitops.h>
 
 static int __ide_end_request(ide_drive_t *drive, struct request *rq,
-			     int uptodate, unsigned int nr_bytes)
+			     int uptodate, unsigned int nr_bytes, int dequeue)
 {
 	int ret = 1;
 
@@ -80,9 +80,11 @@
 
 	if (!end_that_request_chunk(rq, uptodate, nr_bytes)) {
 		add_disk_randomness(rq->rq_disk);
-		if (!list_empty(&rq->queuelist))
-			blkdev_dequeue_request(rq);
-		HWGROUP(drive)->rq = NULL;
+		if (dequeue) {
+			if (!list_empty(&rq->queuelist))
+				blkdev_dequeue_request(rq);
+			HWGROUP(drive)->rq = NULL;
+		}
 		end_that_request_last(rq, uptodate);
 		ret = 0;
 	}
@@ -122,7 +124,7 @@
 			nr_bytes = rq->hard_cur_sectors << 9;
 	}
 
-	ret = __ide_end_request(drive, rq, uptodate, nr_bytes);
+	ret = __ide_end_request(drive, rq, uptodate, nr_bytes, 1);
 
 	spin_unlock_irqrestore(&ide_lock, flags);
 	return ret;
@@ -255,39 +257,13 @@
 			     int uptodate, int nr_sectors)
 {
 	unsigned long flags;
-	int ret = 1;
+	int ret;
 
 	spin_lock_irqsave(&ide_lock, flags);
-
 	BUG_ON(!blk_rq_started(rq));
-
-	/*
-	 * if failfast is set on a request, override number of sectors and
-	 * complete the whole request right now
-	 */
-	if (blk_noretry_request(rq) && end_io_error(uptodate))
-		nr_sectors = rq->hard_nr_sectors;
-
-	if (!blk_fs_request(rq) && end_io_error(uptodate) && !rq->errors)
-		rq->errors = -EIO;
-
-	/*
-	 * decide whether to reenable DMA -- 3 is a random magic for now,
-	 * if we DMA timeout more than 3 times, just stay in PIO
-	 */
-	if (drive->state == DMA_PIO_RETRY && drive->retry_pio <= 3) {
-		drive->state = 0;
-		HWGROUP(drive)->hwif->ide_dma_on(drive);
-	}
-
-	if (!end_that_request_first(rq, uptodate, nr_sectors)) {
-		add_disk_randomness(rq->rq_disk);
-		if (blk_rq_tagged(rq))
-			blk_queue_end_tag(drive->queue, rq);
-		end_that_request_last(rq, uptodate);
-		ret = 0;
-	}
+	ret = __ide_end_request(drive, rq, uptodate, nr_sectors << 9, 0);
 	spin_unlock_irqrestore(&ide_lock, flags);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(ide_end_dequeued_request);
@@ -800,7 +776,20 @@
 		s->b.set_tune = 0;
 
 		if (set_pio_mode_abuse(drive->hwif, req_pio)) {
-			if (hwif->set_pio_mode)
+
+			if (hwif->set_pio_mode == NULL)
+				return ide_stopped;
+
+			/*
+			 * take ide_lock for drive->[no_]unmask/[no_]io_32bit
+			 */
+			if (req_pio == 8 || req_pio == 9) {
+				unsigned long flags;
+
+				spin_lock_irqsave(&ide_lock, flags);
+				hwif->set_pio_mode(drive, req_pio);
+				spin_unlock_irqrestore(&ide_lock, flags);
+			} else
 				hwif->set_pio_mode(drive, req_pio);
 		} else {
 			int keep_dma = drive->using_dma;
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index d4d790f..9516883 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -693,35 +693,16 @@
 }
 #endif /* CONFIG_BLK_DEV_IDEDMA */
 
-/*
- * Update the 
- */
-int ide_driveid_update (ide_drive_t *drive)
+int ide_driveid_update(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct hd_driveid *id;
-#if 0
-	id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC);
-	if (!id)
-		return 0;
+	unsigned long timeout, flags;
 
-	taskfile_lib_get_identify(drive, (char *)&id);
-
-	ide_fix_driveid(id);
-	if (id) {
-		drive->id->dma_ultra = id->dma_ultra;
-		drive->id->dma_mword = id->dma_mword;
-		drive->id->dma_1word = id->dma_1word;
-		/* anything more ? */
-		kfree(id);
-	}
-	return 1;
-#else
 	/*
 	 * Re-read drive->id for possible DMA mode
 	 * change (copied from ide-probe.c)
 	 */
-	unsigned long timeout, flags;
 
 	SELECT_MASK(drive, 1);
 	if (IDE_CONTROL_REG)
@@ -763,7 +744,6 @@
 	}
 
 	return 1;
-#endif
 }
 
 int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 2b8009c..e245521 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -40,9 +40,8 @@
 	ide_std_init_ports(&hw, pnp_port_start(dev, 0),
 				pnp_port_start(dev, 1));
 	hw.irq = pnp_irq(dev, 0);
-	hw.dma = NO_DMA;
 
-	index = ide_register_hw(&hw, 1, &hwif);
+	index = ide_register_hw(&hw, NULL, 1, &hwif);
 
 	if (index != -1) {
 	    	printk(KERN_INFO "ide%d: generic PnP IDE interface\n", index);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index e294c74..d5146c5 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -717,7 +717,7 @@
  * This routine only knows how to look for drive units 0 and 1
  * on an interface, so any setting of MAX_DRIVES > 2 won't work here.
  */
-static void probe_hwif(ide_hwif_t *hwif, void (*fixup)(ide_hwif_t *hwif))
+static void probe_hwif(ide_hwif_t *hwif)
 {
 	unsigned long flags;
 	unsigned int irqd;
@@ -819,8 +819,8 @@
 		return;
 	}
 
-	if (fixup)
-		fixup(hwif);
+	if (hwif->fixup)
+		hwif->fixup(hwif);
 
 	for (unit = 0; unit < MAX_DRIVES; ++unit) {
 		ide_drive_t *drive = &hwif->drives[unit];
@@ -859,10 +859,11 @@
 }
 
 static int hwif_init(ide_hwif_t *hwif);
+static void hwif_register_devices(ide_hwif_t *hwif);
 
-int probe_hwif_init_with_fixup(ide_hwif_t *hwif, void (*fixup)(ide_hwif_t *hwif))
+static int probe_hwif_init(ide_hwif_t *hwif)
 {
-	probe_hwif(hwif, fixup);
+	probe_hwif(hwif);
 
 	if (!hwif_init(hwif)) {
 		printk(KERN_INFO "%s: failed to initialize IDE interface\n",
@@ -870,34 +871,12 @@
 		return -1;
 	}
 
-	if (hwif->present) {
-		u16 unit = 0;
-		int ret;
+	if (hwif->present)
+		hwif_register_devices(hwif);
 
-		for (unit = 0; unit < MAX_DRIVES; ++unit) {
-			ide_drive_t *drive = &hwif->drives[unit];
-			/* For now don't attach absent drives, we may
-			   want them on default or a new "empty" class
-			   for hotplug reprobing ? */
-			if (drive->present) {
-				ret = device_register(&drive->gendev);
-				if (ret < 0)
-					printk(KERN_WARNING "IDE: %s: "
-						"device_register error: %d\n",
-						__FUNCTION__, ret);
-			}
-		}
-	}
 	return 0;
 }
 
-int probe_hwif_init(ide_hwif_t *hwif)
-{
-	return probe_hwif_init_with_fixup(hwif, NULL);
-}
-
-EXPORT_SYMBOL(probe_hwif_init);
-
 #if MAX_HWIFS > 1
 /*
  * save_match() is used to simplify logic in init_irq() below.
@@ -1379,6 +1358,24 @@
 	return 0;
 }
 
+static void hwif_register_devices(ide_hwif_t *hwif)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_DRIVES; i++) {
+		ide_drive_t *drive = &hwif->drives[i];
+
+		if (drive->present) {
+			int ret = device_register(&drive->gendev);
+
+			if (ret < 0)
+				printk(KERN_WARNING "IDE: %s: "
+					"device_register error: %d\n",
+					__FUNCTION__, ret);
+		}
+	}
+}
+
 int ideprobe_init (void)
 {
 	unsigned int index;
@@ -1390,27 +1387,18 @@
 
 	for (index = 0; index < MAX_HWIFS; ++index)
 		if (probe[index])
-			probe_hwif(&ide_hwifs[index], NULL);
+			probe_hwif(&ide_hwifs[index]);
 	for (index = 0; index < MAX_HWIFS; ++index)
 		if (probe[index])
 			hwif_init(&ide_hwifs[index]);
 	for (index = 0; index < MAX_HWIFS; ++index) {
 		if (probe[index]) {
 			ide_hwif_t *hwif = &ide_hwifs[index];
-			int unit;
 			if (!hwif->present)
 				continue;
 			if (hwif->chipset == ide_unknown || hwif->chipset == ide_forced)
 				hwif->chipset = ide_generic;
-			for (unit = 0; unit < MAX_DRIVES; ++unit)
-				if (hwif->drives[unit].present) {
-					int ret = device_register(
-						&hwif->drives[unit].gendev);
-					if (ret < 0)
-						printk(KERN_WARNING "IDE: %s: "
-							"device_register error: %d\n",
-							__FUNCTION__, ret);
-				}
+			hwif_register_devices(hwif);
 		}
 	}
 	for (index = 0; index < MAX_HWIFS; ++index)
@@ -1420,3 +1408,22 @@
 }
 
 EXPORT_SYMBOL_GPL(ideprobe_init);
+
+int ide_device_add(u8 idx[4])
+{
+	int i, rc = 0;
+
+	for (i = 0; i < 4; i++) {
+		if (idx[i] != 0xff)
+			rc |= probe_hwif_init(&ide_hwifs[idx[i]]);
+	}
+
+	for (i = 0; i < 4; i++) {
+		if (idx[i] != 0xff)
+			ide_proc_register_port(&ide_hwifs[idx[i]]);
+	}
+
+	return rc;
+}
+
+EXPORT_SYMBOL_GPL(ide_device_add);
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index fc1d8ae..a4007d3 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -804,8 +804,6 @@
 	create_proc_ide_drives(hwif);
 }
 
-EXPORT_SYMBOL_GPL(ide_proc_register_port);
-
 #ifdef CONFIG_BLK_DEV_IDEPCI
 void ide_pci_create_host_proc(const char *name, get_info_t *get_info)
 {
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 1fa5794..e5a86a9 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -621,7 +621,6 @@
  */
 #define USE_IOTRACE	0
 #if USE_IOTRACE
-#include <linux/io_trace.h>
 #define IO_IDETAPE_FIFO	500
 #endif
 
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 2a3c8d4..73ef6bf 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -8,23 +8,6 @@
  *  Copyright (C) 2003-2004	Bartlomiej Zolnierkiewicz
  *
  *  The big the bad and the ugly.
- *
- *  Problems to be fixed because of BH interface or the lack therefore.
- *
- *  Fill me in stupid !!!
- *
- *  HOST:
- *	General refers to the Controller and Driver "pair".
- *  DATA HANDLER:
- *	Under the context of Linux it generally refers to an interrupt handler.
- *	However, it correctly describes the 'HOST'
- *  DATA BLOCK:
- *	The amount of data needed to be transfered as predefined in the
- *	setup of the device.
- *  STORAGE ATOMIC:
- *	The 'DATA BLOCK' associated to the 'DATA HANDLER', and can be as
- *	small as a single sector or as large as the entire command block
- *	request.
  */
 
 #include <linux/module.h>
@@ -695,9 +678,6 @@
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 }
 
-/*
- * FIXME : this needs to map into at taskfile. <andre@linux-ide.org>
- */
 int ide_cmd_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg)
 {
 	int err = 0;
@@ -761,9 +741,6 @@
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 }
 
-/*
- * FIXME : this needs to map into at taskfile. <andre@linux-ide.org>
- */
 int ide_task_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg)
 {
 	void __user *p = (void __user *)arg;
@@ -860,9 +837,14 @@
 		case TASKFILE_OUT_DMA:
 		case TASKFILE_IN_DMAQ:
 		case TASKFILE_IN_DMA:
-			hwif->dma_setup(drive);
-			hwif->dma_exec_cmd(drive, taskfile->command);
-			hwif->dma_start(drive);
+			if (!drive->using_dma)
+				break;
+
+			if (!hwif->dma_setup(drive)) {
+				hwif->dma_exec_cmd(drive, taskfile->command);
+				hwif->dma_start(drive);
+				return ide_started;
+			}
 			break;
 
 	        default:
@@ -876,7 +858,8 @@
 				return task->prehandler(drive, task->rq);
 			}
 			ide_execute_command(drive, taskfile->command, task->handler, WAIT_WORSTCASE, NULL);
+			return ide_started;
 	}
 
-	return ide_started;
+	return ide_stopped;
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 961e6c8..674a65c 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -168,7 +168,6 @@
 
 	ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, &hwif->irq);
 
-	memcpy(&hwif->hw, &hw, sizeof(hw));
 	memcpy(hwif->io_ports, hw.io_ports, sizeof(hw.io_ports));
 
 	hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
@@ -214,7 +213,7 @@
 		init_hwif_data(hwif, index);
 		init_hwif_default(hwif, index);
 #if !defined(CONFIG_PPC32) || !defined(CONFIG_PCI)
-		hwif->irq = hwif->hw.irq =
+		hwif->irq =
 			ide_init_default_irq(hwif->io_ports[IDE_DATA_OFFSET]);
 #endif
 	}
@@ -265,6 +264,30 @@
 	return system_bus_speed;
 }
 
+ide_hwif_t * ide_find_port(unsigned long base)
+{
+	ide_hwif_t *hwif;
+	int i;
+
+	for (i = 0; i < MAX_HWIFS; i++) {
+		hwif = &ide_hwifs[i];
+		if (hwif->io_ports[IDE_DATA_OFFSET] == base)
+			goto found;
+	}
+
+	for (i = 0; i < MAX_HWIFS; i++) {
+		hwif = &ide_hwifs[i];
+		if (hwif->io_ports[IDE_DATA_OFFSET] == 0)
+			goto found;
+	}
+
+	hwif = NULL;
+found:
+	return hwif;
+}
+
+EXPORT_SYMBOL_GPL(ide_find_port);
+
 static struct resource* hwif_request_region(ide_hwif_t *hwif,
 					    unsigned long addr, int num)
 {
@@ -391,6 +414,8 @@
 	hwif->cds			= tmp_hwif->cds;
 #endif
 
+	hwif->fixup			= tmp_hwif->fixup;
+
 	hwif->set_pio_mode		= tmp_hwif->set_pio_mode;
 	hwif->set_dma_mode		= tmp_hwif->set_dma_mode;
 	hwif->mdma_filter		= tmp_hwif->mdma_filter;
@@ -652,7 +677,6 @@
 		}
 	}
 	hw->irq = irq;
-	hw->dma = NO_DMA;
 	hw->ack_intr = ack_intr;
 /*
  *	hw->iops = iops;
@@ -660,11 +684,11 @@
 }
 
 /**
- *	ide_register_hw_with_fixup	-	register IDE interface
+ *	ide_register_hw		-	register IDE interface
  *	@hw: hardware registers
+ *	@fixup: fixup function
  *	@initializing: set while initializing built-in drivers
  *	@hwifp: pointer to returned hwif
- *	@fixup: fixup function
  *
  *	Register an IDE interface, specifying exactly the registers etc.
  *	Set init=1 iff calling before probes have taken place.
@@ -672,9 +696,8 @@
  *	Returns -1 on error.
  */
 
-int ide_register_hw_with_fixup(hw_regs_t *hw, int initializing,
-			       ide_hwif_t **hwifp,
-			       void(*fixup)(ide_hwif_t *hwif))
+int ide_register_hw(hw_regs_t *hw, void (*fixup)(ide_hwif_t *),
+		    int initializing, ide_hwif_t **hwifp)
 {
 	int index, retry = 1;
 	ide_hwif_t *hwif;
@@ -682,7 +705,7 @@
 	do {
 		for (index = 0; index < MAX_HWIFS; ++index) {
 			hwif = &ide_hwifs[index];
-			if (hwif->hw.io_ports[IDE_DATA_OFFSET] == hw->io_ports[IDE_DATA_OFFSET])
+			if (hwif->io_ports[IDE_DATA_OFFSET] == hw->io_ports[IDE_DATA_OFFSET])
 				goto found;
 		}
 		for (index = 0; index < MAX_HWIFS; ++index) {
@@ -690,7 +713,7 @@
 			if (hwif->hold)
 				continue;
 			if ((!hwif->present && !hwif->mate && !initializing) ||
-			    (!hwif->hw.io_ports[IDE_DATA_OFFSET] && initializing))
+			    (!hwif->io_ports[IDE_DATA_OFFSET] && initializing))
 				goto found;
 		}
 		for (index = 0; index < MAX_HWIFS; index++)
@@ -706,16 +729,18 @@
 	}
 	if (hwif->present)
 		return -1;
-	memcpy(&hwif->hw, hw, sizeof(*hw));
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports));
+	memcpy(hwif->io_ports, hw->io_ports, sizeof(hwif->io_ports));
 	hwif->irq = hw->irq;
 	hwif->noprobe = 0;
+	hwif->fixup = fixup;
 	hwif->chipset = hw->chipset;
 	hwif->gendev.parent = hw->dev;
+	hwif->ack_intr = hw->ack_intr;
 
-	if (!initializing) {
-		probe_hwif_init_with_fixup(hwif, fixup);
-		ide_proc_register_port(hwif);
+	if (initializing == 0) {
+		u8 idx[4] = { index, 0xff, 0xff, 0xff };
+
+		ide_device_add(idx);
 	}
 
 	if (hwifp)
@@ -724,13 +749,6 @@
 	return (initializing || hwif->present) ? index : -1;
 }
 
-EXPORT_SYMBOL(ide_register_hw_with_fixup);
-
-int ide_register_hw(hw_regs_t *hw, int initializing, ide_hwif_t **hwifp)
-{
-	return ide_register_hw_with_fixup(hw, initializing, hwifp, NULL);
-}
-
 EXPORT_SYMBOL(ide_register_hw);
 
 /*
@@ -1046,7 +1064,7 @@
 			ide_init_hwif_ports(&hw, (unsigned long) args[0],
 					    (unsigned long) args[1], NULL);
 			hw.irq = args[2];
-			if (ide_register_hw(&hw, 0, NULL) == -1)
+			if (ide_register_hw(&hw, NULL, 0, NULL) == -1)
 				return -EIO;
 			return 0;
 		}
@@ -1397,6 +1415,9 @@
 			"reset", "minus6", "ata66", "minus8", "minus9",
 			"minus10", "four", "qd65xx", "ht6560b", "cmd640_vlb",
 			"dtc2278", "umc8672", "ali14xx", NULL };
+
+		hw_regs_t hwregs;
+
 		hw = s[3] - '0';
 		hwif = &ide_hwifs[hw];
 		i = match_parm(&s[4], ide_words, vals, 3);
@@ -1506,9 +1527,9 @@
 			case 2: /* base,ctl */
 				vals[2] = 0;	/* default irq = probe for it */
 			case 3: /* base,ctl,irq */
-				hwif->hw.irq = vals[2];
-				ide_init_hwif_ports(&hwif->hw, (unsigned long) vals[0], (unsigned long) vals[1], &hwif->irq);
-				memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+				memset(&hwregs, 0, sizeof(hwregs));
+				ide_init_hwif_ports(&hwregs, vals[0], vals[1], &hwif->irq);
+				memcpy(hwif->io_ports, hwregs.io_ports, sizeof(hwif->io_ports));
 				hwif->irq      = vals[2];
 				hwif->noprobe  = 0;
 				hwif->chipset  = ide_forced;
diff --git a/drivers/ide/legacy/ali14xx.c b/drivers/ide/legacy/ali14xx.c
index 2f0ef9b..10311ec 100644
--- a/drivers/ide/legacy/ali14xx.c
+++ b/drivers/ide/legacy/ali14xx.c
@@ -102,6 +102,8 @@
 	outb_p(data, dataPort);
 }
 
+static DEFINE_SPINLOCK(ali14xx_lock);
+
 /*
  * Set PIO mode for the specified drive.
  * This function computes timing parameters
@@ -129,14 +131,14 @@
 
 	/* stuff timing parameters into controller registers */
 	driveNum = (HWIF(drive)->index << 1) + drive->select.b.unit;
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&ali14xx_lock, flags);
 	outb_p(regOn, basePort);
 	outReg(param1, regTab[driveNum].reg1);
 	outReg(param2, regTab[driveNum].reg2);
 	outReg(param3, regTab[driveNum].reg3);
 	outReg(param4, regTab[driveNum].reg4);
 	outb_p(regOff, basePort);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&ali14xx_lock, flags);
 }
 
 /*
@@ -193,6 +195,7 @@
 static int __init ali14xx_probe(void)
 {
 	ide_hwif_t *hwif, *mate;
+	static u8 idx[4] = { 0, 1, 0xff, 0xff };
 
 	printk(KERN_DEBUG "ali14xx: base=0x%03x, regOn=0x%02x.\n",
 			  basePort, regOn);
@@ -217,11 +220,7 @@
 	mate->mate = hwif;
 	mate->channel = 1;
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
-
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/legacy/buddha.c b/drivers/ide/legacy/buddha.c
index 101aee1..4a0be25 100644
--- a/drivers/ide/legacy/buddha.c
+++ b/drivers/ide/legacy/buddha.c
@@ -212,8 +212,8 @@
 //						xsurf_iops,
 						IRQ_AMIGA_PORTS);
 			}	
-			
-			index = ide_register_hw(&hw, 1, &hwif);
+
+			index = ide_register_hw(&hw, NULL, 1, &hwif);
 			if (index != -1) {
 				hwif->mmio = 1;
 				printk("ide%d: ", index);
diff --git a/drivers/ide/legacy/dtc2278.c b/drivers/ide/legacy/dtc2278.c
index f165212..24a845d 100644
--- a/drivers/ide/legacy/dtc2278.c
+++ b/drivers/ide/legacy/dtc2278.c
@@ -67,20 +67,24 @@
 	}
 }
 
+static DEFINE_SPINLOCK(dtc2278_lock);
+
 static void dtc2278_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
 	unsigned long flags;
 
 	if (pio >= 3) {
-		spin_lock_irqsave(&ide_lock, flags);
+		spin_lock_irqsave(&dtc2278_lock, flags);
 		/*
 		 * This enables PIO mode4 (3?) on the first interface
 		 */
 		sub22(1,0xc3);
 		sub22(0,0xa0);
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&dtc2278_lock, flags);
 	} else {
 		/* we don't know how to set it back again.. */
+		/* Actually we do - there is a data sheet available for the
+		   Winbond but does anyone actually care */
 	}
 
 	/*
@@ -94,6 +98,7 @@
 {
 	unsigned long flags;
 	ide_hwif_t *hwif, *mate;
+	static u8 idx[4] = { 0, 1, 0xff, 0xff };
 
 	hwif = &ide_hwifs[0];
 	mate = &ide_hwifs[1];
@@ -129,16 +134,13 @@
 
 	mate->serialized = 1;
 	mate->chipset = ide_dtc2278;
+	mate->pio_mask = ATA_PIO4;
 	mate->drives[0].no_unmask = 1;
 	mate->drives[1].no_unmask = 1;
 	mate->mate = hwif;
 	mate->channel = 1;
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
-
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c
index f0829b8..7d7936f 100644
--- a/drivers/ide/legacy/falconide.c
+++ b/drivers/ide/legacy/falconide.c
@@ -72,7 +72,7 @@
 			0, 0, NULL,
 //			falconide_iops,
 			IRQ_MFP_IDE);
-	index = ide_register_hw(&hw, 1, NULL);
+	index = ide_register_hw(&hw, NULL, 1, NULL);
 
 	if (index != -1)
 	    printk("ide%d: Falcon IDE interface\n", index);
diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c
index 0830a02..53331ee 100644
--- a/drivers/ide/legacy/gayle.c
+++ b/drivers/ide/legacy/gayle.c
@@ -165,7 +165,7 @@
 //			&gayle_iops,
 			IRQ_AMIGA_PORTS);
 
-	index = ide_register_hw(&hw, 1, &hwif);
+	index = ide_register_hw(&hw, NULL, 1, &hwif);
 	if (index != -1) {
 	    hwif->mmio = 1;
 	    switch (i) {
diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c
index 2e5a9cc..a4245d1 100644
--- a/drivers/ide/legacy/ht6560b.c
+++ b/drivers/ide/legacy/ht6560b.c
@@ -247,6 +247,8 @@
 	}
 }
 
+static DEFINE_SPINLOCK(ht6560b_lock);
+
 /*
  *  Enable/Disable so called prefetch mode
  */
@@ -254,9 +256,9 @@
 {
 	unsigned long flags;
 	int t = HT_PREFETCH_MODE << 8;
-	
-	spin_lock_irqsave(&ide_lock, flags);
-	
+
+	spin_lock_irqsave(&ht6560b_lock, flags);
+
 	/*
 	 *  Prefetch mode and unmask irq seems to conflict
 	 */
@@ -268,9 +270,9 @@
 		drive->drive_data &= ~t;  /* disable prefetch mode */
 		drive->no_unmask = 0;
 	}
-	
-	spin_unlock_irqrestore(&ide_lock, flags);
-	
+
+	spin_unlock_irqrestore(&ht6560b_lock, flags);
+
 #ifdef DEBUG
 	printk("ht6560b: drive %s prefetch mode %sabled\n", drive->name, (state ? "en" : "dis"));
 #endif
@@ -287,16 +289,14 @@
 		ht_set_prefetch(drive, pio & 1);
 		return;
 	}
-	
+
 	timing = ht_pio2timings(drive, pio);
-	
-	spin_lock_irqsave(&ide_lock, flags);
-	
+
+	spin_lock_irqsave(&ht6560b_lock, flags);
 	drive->drive_data &= 0xff00;
 	drive->drive_data |= timing;
-	
-	spin_unlock_irqrestore(&ide_lock, flags);
-	
+	spin_unlock_irqrestore(&ht6560b_lock, flags);
+
 #ifdef DEBUG
 	printk("ht6560b: drive %s tuned to pio mode %#x timing=%#x\n", drive->name, pio, timing);
 #endif
@@ -311,6 +311,7 @@
 int __init ht6560b_init(void)
 {
 	ide_hwif_t *hwif, *mate;
+	static u8 idx[4] = { 0, 1, 0xff, 0xff };
 	int t;
 
 	if (probe_ht6560b == 0)
@@ -359,11 +360,7 @@
 	mate->drives[0].drive_data = t;
 	mate->drives[1].drive_data = t;
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
-
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 
diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c
index e8e360c..03715c0 100644
--- a/drivers/ide/legacy/ide-cs.c
+++ b/drivers/ide/legacy/ide-cs.c
@@ -153,7 +153,7 @@
     hw.irq = irq;
     hw.chipset = ide_pci;
     hw.dev = &handle->dev;
-    return ide_register_hw_with_fixup(&hw, 0, NULL, ide_undecoded_slave);
+    return ide_register_hw(&hw, &ide_undecoded_slave, 0, NULL);
 }
 
 /*======================================================================
diff --git a/drivers/ide/legacy/ide_platform.c b/drivers/ide/legacy/ide_platform.c
index b992b2b..7bb79f5 100644
--- a/drivers/ide/legacy/ide_platform.c
+++ b/drivers/ide/legacy/ide_platform.c
@@ -33,39 +33,24 @@
 	    int mmio)
 {
 	unsigned long port = (unsigned long)base;
-	ide_hwif_t *hwif;
-	int index, i;
+	ide_hwif_t *hwif = ide_find_port(port);
+	int i;
 
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = ide_hwifs + index;
-		if (hwif->io_ports[IDE_DATA_OFFSET] == port)
-			goto found;
-	}
+	if (hwif == NULL)
+		goto out;
 
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = ide_hwifs + index;
-		if (hwif->io_ports[IDE_DATA_OFFSET] == 0)
-			goto found;
-	}
-
-	return NULL;
-
-found:
-
-	hwif->hw.io_ports[IDE_DATA_OFFSET] = port;
+	hwif->io_ports[IDE_DATA_OFFSET] = port;
 
 	port += (1 << pdata->ioport_shift);
 	for (i = IDE_ERROR_OFFSET; i <= IDE_STATUS_OFFSET;
 	     i++, port += (1 << pdata->ioport_shift))
-		hwif->hw.io_ports[i] = port;
+		hwif->io_ports[i] = port;
 
-	hwif->hw.io_ports[IDE_CONTROL_OFFSET] = (unsigned long)ctrl;
+	hwif->io_ports[IDE_CONTROL_OFFSET] = (unsigned long)ctrl;
 
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports));
-	hwif->hw.irq = hwif->irq = irq;
+	hwif->irq = irq;
 
-	hwif->hw.dma = NO_DMA;
-	hwif->chipset = hwif->hw.chipset = ide_generic;
+	hwif->chipset = ide_generic;
 
 	if (mmio) {
 		hwif->mmio = 1;
@@ -73,8 +58,8 @@
 	}
 
 	hwif_prop.hwif = hwif;
-	hwif_prop.index = index;
-
+	hwif_prop.index = hwif->index;
+out:
 	return hwif;
 }
 
@@ -83,6 +68,7 @@
 	struct resource *res_base, *res_alt, *res_irq;
 	ide_hwif_t *hwif;
 	struct pata_platform_info *pdata;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 	int ret = 0;
 	int mmio = 0;
 
@@ -130,10 +116,11 @@
 	hwif->gendev.parent = &pdev->dev;
 	hwif->noprobe = 0;
 
-	probe_hwif_init(hwif);
+	idx[0] = hwif->index;
+
+	ide_device_add(idx);
 
 	platform_set_drvdata(pdev, hwif);
-	ide_proc_register_port(hwif);
 
 	return 0;
 
diff --git a/drivers/ide/legacy/macide.c b/drivers/ide/legacy/macide.c
index b557c45..e87cd2f 100644
--- a/drivers/ide/legacy/macide.c
+++ b/drivers/ide/legacy/macide.c
@@ -93,21 +93,21 @@
 				0, 0, macide_ack_intr,
 //				quadra_ide_iops,
 				IRQ_NUBUS_F);
-		index = ide_register_hw(&hw, 1, &hwif);
+		index = ide_register_hw(&hw, NULL, 1, &hwif);
 		break;
 	case MAC_IDE_PB:
 		ide_setup_ports(&hw, IDE_BASE, macide_offsets,
 				0, 0, macide_ack_intr,
 //				macide_pb_iops,
 				IRQ_NUBUS_C);
-		index = ide_register_hw(&hw, 1, &hwif);
+		index = ide_register_hw(&hw, NULL, 1, &hwif);
 		break;
 	case MAC_IDE_BABOON:
 		ide_setup_ports(&hw, BABOON_BASE, macide_offsets,
 				0, 0, NULL,
 //				macide_baboon_iops,
 				IRQ_BABOON_1);
-		index = ide_register_hw(&hw, 1, &hwif);
+		index = ide_register_hw(&hw, NULL, 1, &hwif);
 		if (index == -1) break;
 		if (macintosh_config->ident == MAC_MODEL_PB190) {
 
diff --git a/drivers/ide/legacy/q40ide.c b/drivers/ide/legacy/q40ide.c
index e628a98..44cdb74 100644
--- a/drivers/ide/legacy/q40ide.c
+++ b/drivers/ide/legacy/q40ide.c
@@ -89,9 +89,8 @@
 		else
 			hw->io_ports[i] = Q40_ISA_IO_B(base + offsets[i]);
 	}
-	
+
 	hw->irq = irq;
-	hw->dma = NO_DMA;
 	hw->ack_intr = ack_intr;
 /*
  *	hw->iops = iops;
@@ -142,7 +141,7 @@
 			0, NULL,
 //			m68kide_iops,
 			q40ide_default_irq(pcide_bases[i]));
-	index = ide_register_hw(&hw, 1, &hwif);
+	index = ide_register_hw(&hw, NULL, 1, &hwif);
 	// **FIXME**
 	if (index != -1)
 		hwif->mmio = 1;
diff --git a/drivers/ide/legacy/qd65xx.c b/drivers/ide/legacy/qd65xx.c
index 0c81d2d..912e738 100644
--- a/drivers/ide/legacy/qd65xx.c
+++ b/drivers/ide/legacy/qd65xx.c
@@ -89,26 +89,6 @@
 
 static int timings[4]={-1,-1,-1,-1}; /* stores current timing for each timer */
 
-static void qd_write_reg (u8 content, unsigned long reg)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ide_lock, flags);
-	outb(content,reg);
-	spin_unlock_irqrestore(&ide_lock, flags);
-}
-
-static u8 __init qd_read_reg (unsigned long reg)
-{
-	unsigned long flags;
-	u8 read;
-
-	spin_lock_irqsave(&ide_lock, flags);
-	read = inb(reg);
-	spin_unlock_irqrestore(&ide_lock, flags);
-	return read;
-}
-
 /*
  * qd_select:
  *
@@ -121,7 +101,7 @@
 			(QD_TIMREG(drive) & 0x02);
 
 	if (timings[index] != QD_TIMING(drive))
-		qd_write_reg(timings[index] = QD_TIMING(drive), QD_TIMREG(drive));
+		outb(timings[index] = QD_TIMING(drive), QD_TIMREG(drive));
 }
 
 /*
@@ -284,7 +264,7 @@
 	}
 
 	if (!HWIF(drive)->channel && drive->media != ide_disk) {
-		qd_write_reg(0x5f, QD_CONTROL_PORT);
+		outb(0x5f, QD_CONTROL_PORT);
 		printk(KERN_WARNING "%s: ATAPI: disabled read-ahead FIFO "
 			"and post-write buffer on %s.\n",
 			drive->name, HWIF(drive)->name);
@@ -301,16 +281,15 @@
 
 static int __init qd_testreg(int port)
 {
-	u8 savereg;
-	u8 readreg;
 	unsigned long flags;
+	u8 savereg, readreg;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	local_irq_save(flags);
 	savereg = inb_p(port);
 	outb_p(QD_TESTVAL, port);	/* safe value */
 	readreg = inb_p(port);
 	outb(savereg, port);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	local_irq_restore(flags);
 
 	if (savereg == QD_TESTVAL) {
 		printk(KERN_ERR "Outch ! the probe for qd65xx isn't reliable !\n");
@@ -364,13 +343,13 @@
 
 	if (set_pio_mode == (void *)qd6500_set_pio_mode) {
 		// will do it for both
-		qd_write_reg(QD6500_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
+		outb(QD6500_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
 	} else if (set_pio_mode == (void *)qd6580_set_pio_mode) {
 		if (QD_CONTROL(hwif) & QD_CONTR_SEC_DISABLED) {
-			qd_write_reg(QD6580_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
-			qd_write_reg(QD6580_DEF_DATA2, QD_TIMREG(&hwif->drives[1]));
+			outb(QD6580_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
+			outb(QD6580_DEF_DATA2, QD_TIMREG(&hwif->drives[1]));
 		} else {
-			qd_write_reg(hwif->channel ? QD6580_DEF_DATA2 : QD6580_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
+			outb(hwif->channel ? QD6580_DEF_DATA2 : QD6580_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
 		}
 	} else {
 		printk(KERN_WARNING "Unknown qd65xx tuning fonction !\n");
@@ -389,10 +368,11 @@
 static int __init qd_probe(int base)
 {
 	ide_hwif_t *hwif;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 	u8 config;
 	u8 unit;
 
-	config = qd_read_reg(QD_CONFIG_PORT);
+	config = inb(QD_CONFIG_PORT);
 
 	if (! ((config & QD_CONFIG_BASEPORT) >> 1 == (base == 0xb0)) )
 		return 1;
@@ -419,9 +399,9 @@
 
 		hwif->set_pio_mode = &qd6500_set_pio_mode;
 
-		probe_hwif_init(hwif);
+		idx[0] = unit;
 
-		ide_proc_register_port(hwif);
+		ide_device_add(idx);
 
 		return 1;
 	}
@@ -436,7 +416,7 @@
 
 		/* qd6580 found */
 
-		control = qd_read_reg(QD_CONTROL_PORT);
+		control = inb(QD_CONTROL_PORT);
 
 		printk(KERN_NOTICE "qd6580 at %#x\n", base);
 		printk(KERN_DEBUG "qd6580: config=%#x, control=%#x, ID3=%u\n",
@@ -453,11 +433,11 @@
 
 			hwif->set_pio_mode = &qd6580_set_pio_mode;
 
-			probe_hwif_init(hwif);
+			idx[0] = unit;
 
-			qd_write_reg(QD_DEF_CONTR,QD_CONTROL_PORT);
+			ide_device_add(idx);
 
-			ide_proc_register_port(hwif);
+			outb(QD_DEF_CONTR, QD_CONTROL_PORT);
 
 			return 1;
 		} else {
@@ -474,19 +454,17 @@
 
 			hwif->set_pio_mode = &qd6580_set_pio_mode;
 
-			probe_hwif_init(hwif);
-
 			qd_setup(mate, base, config | (control << 8),
 				 QD6580_DEF_DATA2, QD6580_DEF_DATA2);
 
 			mate->set_pio_mode = &qd6580_set_pio_mode;
 
-			probe_hwif_init(mate);
+			idx[0] = 0;
+			idx[1] = 1;
 
-			qd_write_reg(QD_DEF_CONTR,QD_CONTROL_PORT);
+			ide_device_add(idx);
 
-			ide_proc_register_port(hwif);
-			ide_proc_register_port(mate);
+			outb(QD_DEF_CONTR, QD_CONTROL_PORT);
 
 			return 0; /* no other qd65xx possible */
 		}
diff --git a/drivers/ide/legacy/umc8672.c b/drivers/ide/legacy/umc8672.c
index 1151c92..79577b9 100644
--- a/drivers/ide/legacy/umc8672.c
+++ b/drivers/ide/legacy/umc8672.c
@@ -124,8 +124,9 @@
 
 static int __init umc8672_probe(void)
 {
-	unsigned long flags;
 	ide_hwif_t *hwif, *mate;
+	unsigned long flags;
+	static u8 idx[4] = { 0, 1, 0xff, 0xff };
 
 	if (!request_region(0x108, 2, "umc8672")) {
 		printk(KERN_ERR "umc8672: ports 0x108-0x109 already in use.\n");
@@ -158,11 +159,7 @@
 	mate->mate = hwif;
 	mate->channel = 1;
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
-
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index 2f322d7..1de5856 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -601,8 +601,9 @@
 	_auide_hwif *ahwif = &auide_hwif;
 	ide_hwif_t *hwif;
 	struct resource *res;
-	hw_regs_t *hw;
 	int ret = 0;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
+	hw_regs_t hw;
 
 #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
 	char *mode = "MWDMA2";
@@ -644,12 +645,12 @@
 	/* FIXME:  This might possibly break PCMCIA IDE devices */
 
 	hwif                            = &ide_hwifs[pdev->id];
-	hw 				= &hwif->hw;
-	hwif->irq = hw->irq             = ahwif->irq;
+	hwif->irq			= ahwif->irq;
 	hwif->chipset                   = ide_au1xxx;
 
-	auide_setup_ports(hw, ahwif);
-	memcpy(hwif->io_ports, hw->io_ports, sizeof(hwif->io_ports));
+	memset(&hw, 0, sizeof(hw));
+	auide_setup_ports(&hw, ahwif);
+	memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 
 	hwif->ultra_mask                = 0x0;  /* Disable Ultra DMA */
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
@@ -706,8 +707,10 @@
 	hwif->config_data               = 0;    /* no chipset-specific code */
 
 	hwif->drives[0].autotune        = 1;    /* 1=autotune, 2=noautotune, 0=default */
+	hwif->drives[1].autotune	= 1;
 #endif
-	hwif->drives[0].no_io_32bit     = 1;   
+	hwif->drives[0].no_io_32bit	= 1;
+	hwif->drives[1].no_io_32bit	= 1;
 
 	auide_hwif.hwif                 = hwif;
 	hwif->hwif_data                 = &auide_hwif;
@@ -717,9 +720,9 @@
 	dbdma_init_done = 1;
 #endif
 
-	probe_hwif_init(hwif);
+	idx[0] = hwif->index;
 
-	ide_proc_register_port(hwif);
+	ide_device_add(idx);
 
 	dev_set_drvdata(dev, hwif);
 
diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c
index c2e2957..521edd4 100644
--- a/drivers/ide/mips/swarm.c
+++ b/drivers/ide/mips/swarm.c
@@ -71,6 +71,7 @@
 	u8 __iomem *base;
 	phys_t offset, size;
 	int i;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	if (!SIBYTE_HAVE_IDE)
 		return -ENODEV;
@@ -119,18 +120,15 @@
 	hwif->noprobe = 0;
 
 	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++)
-		hwif->hw.io_ports[i] =
+		hwif->io_ports[i] =
 				(unsigned long)(base + ((0x1f0 + i) << 5));
-	hwif->hw.io_ports[IDE_CONTROL_OFFSET] =
+	hwif->io_ports[IDE_CONTROL_OFFSET] =
 				(unsigned long)(base + (0x3f6 << 5));
-	hwif->hw.irq = K_INT_GB_IDE;
+	hwif->irq = K_INT_GB_IDE;
 
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
-	hwif->irq = hwif->hw.irq;
+	idx[0] = hwif->index;
 
-	probe_hwif_init(hwif);
-
-	ide_proc_register_port(hwif);
+	ide_device_add(idx);
 
 	dev_set_drvdata(dev, hwif);
 
diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index b3dc12a..19ec421 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/ide/pci/aec62xx.c		Version 0.26	Sep 1, 2007
+ * linux/drivers/ide/pci/aec62xx.c		Version 0.27	Sep 16, 2007
  *
  * Copyright (C) 1999-2002	Andre Hedrick <andre@linux-ide.org>
  * Copyright (C) 2007		MontaVista Software, Inc. <source@mvista.com>
@@ -141,19 +141,6 @@
 	drive->hwif->set_dma_mode(drive, pio + XFER_PIO_0);
 }
 
-static void aec62xx_dma_lost_irq (ide_drive_t *drive)
-{
-	switch (HWIF(drive)->pci_dev->device) {
-		case PCI_DEVICE_ID_ARTOP_ATP860:
-		case PCI_DEVICE_ID_ARTOP_ATP860R:
-		case PCI_DEVICE_ID_ARTOP_ATP865:
-		case PCI_DEVICE_ID_ARTOP_ATP865R:
-			printk(" AEC62XX time out ");
-		default:
-			break;
-	}
-}
-
 static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name)
 {
 	int bus_speed = system_bus_clock();
@@ -195,8 +182,6 @@
 	if (hwif->dma_base == 0)
 		return;
 
-	hwif->dma_lost_irq	= &aec62xx_dma_lost_irq;
-
 	if (dev->device == PCI_DEVICE_ID_ARTOP_ATP850UF)
 		return;
 
@@ -209,7 +194,7 @@
 	}
 }
 
-static ide_pci_device_t aec62xx_chipsets[] __devinitdata = {
+static const struct ide_port_info aec62xx_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "AEC6210",
 		.init_chipset	= init_chipset_aec62xx,
@@ -268,12 +253,12 @@
  *	finds a device matching our IDE device tables.
  *
  *	NOTE: since we're going to modify the 'name' field for AEC-6[26]80[R]
- *	chips, pass a local copy of 'struct pci_device_id' down the call chain.
+ *	chips, pass a local copy of 'struct ide_port_info' down the call chain.
  */
- 
+
 static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t d;
+	struct ide_port_info d;
 	u8 idx = id->driver_data;
 
 	d = aec62xx_chipsets[idx];
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index 8ee2b48..a607dd3 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/ide/pci/alim15x3.c		Version 0.27	Aug 27 2007
+ * linux/drivers/ide/pci/alim15x3.c		Version 0.29	Sep 16 2007
  *
  *  Copyright (C) 1998-2000 Michel Aubry, Maintainer
  *  Copyright (C) 1998-2000 Andrzej Krzysztofowicz, Maintainer
@@ -492,6 +492,13 @@
 		 * clear bit 7
 		 */
 		pci_write_config_byte(dev, 0x4b, tmpbyte & 0x7F);
+		/*
+		 * check m1533, 0x5e, bit 1~4 == 1001 => & 00011110 = 00010010
+		 */
+		if (m5229_revision >= 0x20 && isa_dev) {
+			pci_read_config_byte(isa_dev, 0x5e, &tmpbyte);
+			chip_is_1543c_e = ((tmpbyte & 0x1e) == 0x12) ? 1: 0;
+		}
 		goto out;
 	}
 
@@ -537,7 +544,30 @@
 			pci_write_config_byte(isa_dev, 0x79, tmpbyte | 0x02);
 		}
 	}
+
 out:
+	/*
+	 * CD_ROM DMA on (m5229, 0x53, bit0)
+	 *      Enable this bit even if we want to use PIO.
+	 * PIO FIFO off (m5229, 0x53, bit1)
+	 *      The hardware will use 0x54h and 0x55h to control PIO FIFO.
+	 *	(Not on later devices it seems)
+	 *
+	 *	0x53 changes meaning on later revs - we must no touch
+	 *	bit 1 on them.  Need to check if 0x20 is the right break.
+	 */
+	if (m5229_revision >= 0x20) {
+		pci_read_config_byte(dev, 0x53, &tmpbyte);
+
+		if (m5229_revision <= 0x20)
+			tmpbyte = (tmpbyte & (~0x02)) | 0x01;
+		else if (m5229_revision == 0xc7 || m5229_revision == 0xc8)
+			tmpbyte |= 0x03;
+		else
+			tmpbyte |= 0x01;
+
+		pci_write_config_byte(dev, 0x53, tmpbyte);
+	}
 	pci_dev_put(north);
 	pci_dev_put(isa_dev);
 	local_irq_restore(flags);
@@ -616,36 +646,8 @@
 			if ((tmpbyte & (1 << hwif->channel)) == 0)
 				cbl = ATA_CBL_PATA80;
 		}
-	} else {
-		/*
-		 * check m1533, 0x5e, bit 1~4 == 1001 => & 00011110 = 00010010
-		 */
-		pci_read_config_byte(isa_dev, 0x5e, &tmpbyte);
-		chip_is_1543c_e = ((tmpbyte & 0x1e) == 0x12) ? 1: 0;
 	}
 
-	/*
-	 * CD_ROM DMA on (m5229, 0x53, bit0)
-	 *      Enable this bit even if we want to use PIO
-	 * PIO FIFO off (m5229, 0x53, bit1)
-	 *      The hardware will use 0x54h and 0x55h to control PIO FIFO
-	 *	(Not on later devices it seems)
-	 *
-	 *	0x53 changes meaning on later revs - we must no touch
-	 *	bit 1 on them. Need to check if 0x20 is the right break
-	 */
-	 
-	pci_read_config_byte(dev, 0x53, &tmpbyte);
-	
-	if(m5229_revision <= 0x20)
-		tmpbyte = (tmpbyte & (~0x02)) | 0x01;
-	else if (m5229_revision == 0xc7 || m5229_revision == 0xc8)
-		tmpbyte |= 0x03;
-	else
-		tmpbyte |= 0x01;
-
-	pci_write_config_byte(dev, 0x53, tmpbyte);
-
 	local_irq_restore(flags);
 
 	return cbl;
@@ -664,31 +666,9 @@
 	hwif->set_dma_mode = &ali_set_dma_mode;
 	hwif->udma_filter = &ali_udma_filter;
 
-	/* don't use LBA48 DMA on ALi devices before rev 0xC5 */
-	if (m5229_revision <= 0xC4)
-		hwif->host_flags |= IDE_HFLAG_NO_LBA48_DMA;
-
 	if (hwif->dma_base == 0)
 		return;
 
-	/*
-	 * check in ->init_dma guarantees m5229_revision >= 0x20 here
-	 */
-
-	if (m5229_revision == 0x20)
-		hwif->host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
-
-	if (m5229_revision <= 0x20)
-		hwif->ultra_mask = 0x00; /* no udma */
-	else if (m5229_revision < 0xC2)
-		hwif->ultra_mask = ATA_UDMA2;
-	else if (m5229_revision == 0xC2 || m5229_revision == 0xC3)
-		hwif->ultra_mask = ATA_UDMA4;
-	else if (m5229_revision == 0xC4)
-		hwif->ultra_mask = ATA_UDMA5;
-	else
-		hwif->ultra_mask = ATA_UDMA6;
-
 	hwif->dma_setup = &ali15x3_dma_setup;
 
 	if (hwif->cbl != ATA_CBL_PATA40_SHORT)
@@ -766,7 +746,7 @@
 	ide_setup_dma(hwif, dmabase, 8);
 }
 
-static ide_pci_device_t ali15x3_chipset __devinitdata = {
+static const struct ide_port_info ali15x3_chipset __devinitdata = {
 	.name		= "ALI15X3",
 	.init_chipset	= init_chipset_ali15x3,
 	.init_hwif	= init_hwif_ali15x3,
@@ -792,15 +772,34 @@
 		{ },
 	};
 
-	ide_pci_device_t *d = &ali15x3_chipset;
+	struct ide_port_info d = ali15x3_chipset;
+	u8 rev = dev->revision;
 
 	if (pci_dev_present(ati_rs100))
 		printk(KERN_WARNING "alim15x3: ATI Radeon IGP Northbridge is not yet fully tested.\n");
 
+	/* don't use LBA48 DMA on ALi devices before rev 0xC5 */
+	if (rev <= 0xC4)
+		d.host_flags |= IDE_HFLAG_NO_LBA48_DMA;
+
+	if (rev >= 0x20) {
+		if (rev == 0x20)
+			d.host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
+
+		if (rev < 0xC2)
+			d.udma_mask = ATA_UDMA2;
+		else if (rev == 0xC2 || rev == 0xC3)
+			d.udma_mask = ATA_UDMA4;
+		else if (rev == 0xC4)
+			d.udma_mask = ATA_UDMA5;
+		else
+			d.udma_mask = ATA_UDMA6;
+	}
+
 #if defined(CONFIG_SPARC64)
-	d->init_hwif = init_hwif_common_ali15x3;
+	d.init_hwif = init_hwif_common_ali15x3;
 #endif /* CONFIG_SPARC64 */
-	return ide_setup_pci_device(dev, d);
+	return ide_setup_pci_device(dev, &d);
 }
 
 
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index 7cafefb..8d4125e 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -77,7 +77,7 @@
 };
 
 static struct amd_ide_chip *amd_config;
-static ide_pci_device_t *amd_chipset;
+static const struct ide_port_info *amd_chipset;
 static unsigned int amd_80w;
 static unsigned int amd_clock;
 
@@ -242,19 +242,12 @@
 
 static void __devinit init_hwif_amd74xx(ide_hwif_t *hwif)
 {
-	int i;
-
 	if (hwif->irq == 0) /* 0 is bogus but will do for now */
 		hwif->irq = pci_get_legacy_ide_irq(hwif->pci_dev, hwif->channel);
 
 	hwif->set_pio_mode = &amd_set_pio_mode;
 	hwif->set_dma_mode = &amd_set_drive;
 
-	for (i = 0; i < 2; i++) {
-		hwif->drives[i].io_32bit = 1;
-		hwif->drives[i].unmask = 1;
-	}
-
 	if (!hwif->dma_base)
 		return;
 
@@ -270,16 +263,21 @@
 	}
 }
 
+#define IDE_HFLAGS_AMD \
+	(IDE_HFLAG_PIO_NO_BLACKLIST | \
+	 IDE_HFLAG_PIO_NO_DOWNGRADE | \
+	 IDE_HFLAG_POST_SET_MODE | \
+	 IDE_HFLAG_IO_32BIT | \
+	 IDE_HFLAG_UNMASK_IRQS | \
+	 IDE_HFLAG_BOOTABLE)
+
 #define DECLARE_AMD_DEV(name_str)					\
 	{								\
 		.name		= name_str,				\
 		.init_chipset	= init_chipset_amd74xx,			\
 		.init_hwif	= init_hwif_amd74xx,			\
 		.enablebits	= {{0x40,0x02,0x02}, {0x40,0x01,0x01}},	\
-		.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |		\
-				  IDE_HFLAG_PIO_NO_DOWNGRADE |		\
-				  IDE_HFLAG_POST_SET_MODE |		\
-				  IDE_HFLAG_BOOTABLE,			\
+		.host_flags	= IDE_HFLAGS_AMD,			\
 		.pio_mask	= ATA_PIO5,				\
 		.swdma_mask	= ATA_SWDMA2,				\
 		.mwdma_mask	= ATA_MWDMA2,				\
@@ -291,16 +289,13 @@
 		.init_chipset	= init_chipset_amd74xx,			\
 		.init_hwif	= init_hwif_amd74xx,			\
 		.enablebits	= {{0x50,0x02,0x02}, {0x50,0x01,0x01}},	\
-		.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |		\
-				  IDE_HFLAG_PIO_NO_DOWNGRADE |		\
-				  IDE_HFLAG_POST_SET_MODE |		\
-				  IDE_HFLAG_BOOTABLE,			\
+		.host_flags	= IDE_HFLAGS_AMD,			\
 		.pio_mask	= ATA_PIO5,				\
 		.swdma_mask	= ATA_SWDMA2,				\
 		.mwdma_mask	= ATA_MWDMA2,				\
 	}
 
-static ide_pci_device_t amd74xx_chipsets[] __devinitdata = {
+static const struct ide_port_info amd74xx_chipsets[] __devinitdata = {
 	/*  0 */ DECLARE_AMD_DEV("AMD7401"),
 	/*  1 */ DECLARE_AMD_DEV("AMD7409"),
 	/*  2 */ DECLARE_AMD_DEV("AMD7411"),
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index 3078430..ef8e016 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -189,8 +189,7 @@
 	hwif->dma_host_off = &atiixp_dma_host_off;
 }
 
-
-static ide_pci_device_t atiixp_pci_info[] __devinitdata = {
+static const struct ide_port_info atiixp_pci_info[] __devinitdata = {
 	{	/* 0 */
 		.name		= "ATIIXP",
 		.init_hwif	= init_hwif_atiixp,
diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c
index f369645..4aa4810 100644
--- a/drivers/ide/pci/cmd640.c
+++ b/drivers/ide/pci/cmd640.c
@@ -185,6 +185,8 @@
 
 #endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
 
+static DEFINE_SPINLOCK(cmd640_lock);
+
 /*
  * These are initialized to point at the devices we control
  */
@@ -258,12 +260,12 @@
 
 static u8 get_cmd640_reg(u16 reg)
 {
-	u8 b;
 	unsigned long flags;
+	u8 b;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	b = __get_cmd640_reg(reg);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 	return b;
 }
 
@@ -271,9 +273,9 @@
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	__put_cmd640_reg(reg,val);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 }
 
 static int __init match_pci_cmd640_device (void)
@@ -351,7 +353,7 @@
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 
 	outb_p(0x0a, 0x170 + IDE_SELECT_OFFSET);	/* select drive0 */
 	udelay(100);
@@ -359,11 +361,11 @@
 		outb_p(0x1a, 0x170 + IDE_SELECT_OFFSET); /* select drive1 */
 		udelay(100);
 		if ((inb_p(0x170 + IDE_SELECT_OFFSET) & 0x1f) != 0x1a) {
-			spin_unlock_irqrestore(&ide_lock, flags);
+			spin_unlock_irqrestore(&cmd640_lock, flags);
 			return 0; /* nothing responded */
 		}
 	}
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 	return 1; /* success */
 }
 
@@ -440,11 +442,11 @@
 static void set_prefetch_mode (unsigned int index, int mode)
 {
 	ide_drive_t *drive = cmd_drives[index];
+	unsigned long flags;
 	int reg = prefetch_regs[index];
 	u8 b;
-	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	b = __get_cmd640_reg(reg);
 	if (mode) {	/* want prefetch on? */
 #if CMD640_PREFETCH_MASKS
@@ -460,7 +462,7 @@
 		b |= prefetch_masks[index];	/* disable prefetch */
 	}
 	__put_cmd640_reg(reg, b);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 }
 
 /*
@@ -561,7 +563,7 @@
 	/*
 	 * Now that everything is ready, program the new timings
 	 */
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	/*
 	 * Program the address_setup clocks into ARTTIM reg,
 	 * and then the active/recovery counts into the DRWTIM reg
@@ -570,7 +572,7 @@
 	setup_count |= __get_cmd640_reg(arttim_regs[index]) & 0x3f;
 	__put_cmd640_reg(arttim_regs[index], setup_count);
 	__put_cmd640_reg(drwtim_regs[index], pack_nibbles(active_count, recovery_count));
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 }
 
 /*
@@ -670,20 +672,20 @@
 
 static int pci_conf1(void)
 {
-	u32 tmp;
 	unsigned long flags;
+	u32 tmp;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	outb(0x01, 0xCFB);
 	tmp = inl(0xCF8);
 	outl(0x80000000, 0xCF8);
 	if (inl(0xCF8) == 0x80000000) {
 		outl(tmp, 0xCF8);
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&cmd640_lock, flags);
 		return 1;
 	}
 	outl(tmp, 0xCF8);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 	return 0;
 }
 
@@ -691,15 +693,15 @@
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	outb(0x00, 0xCFB);
 	outb(0x00, 0xCF8);
 	outb(0x00, 0xCFA);
 	if (inb(0xCF8) == 0x00 && inb(0xCF8) == 0x00) {
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&cmd640_lock, flags);
 		return 1;
 	}
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 	return 0;
 }
 
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index adee2ef..ea0143e 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -535,7 +535,6 @@
 		hwif->ide_dma_test_irq	= &cmd648_ide_dma_test_irq;
 		break;
 	case PCI_DEVICE_ID_CMD_646:
-		hwif->chipset = ide_cmd646;
 		if (dev->revision == 0x01) {
 			hwif->ide_dma_end = &cmd646_1_ide_dma_end;
 			break;
@@ -549,7 +548,7 @@
 	}
 }
 
-static ide_pci_device_t cmd64x_chipsets[] __devinitdata = {
+static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "CMD643",
 		.init_chipset	= init_chipset_cmd64x,
@@ -573,6 +572,7 @@
 		.init_chipset	= init_chipset_cmd64x,
 		.init_hwif	= init_hwif_cmd64x,
 		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
+		.chipset	= ide_cmd646,
 		.host_flags	= IDE_HFLAG_ABUSE_PREFETCH | IDE_HFLAG_BOOTABLE,
 		.pio_mask	= ATA_PIO5,
 		.mwdma_mask	= ATA_MWDMA2,
@@ -591,7 +591,7 @@
 
 static int __devinit cmd64x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t d;
+	struct ide_port_info d;
 	u8 idx = id->driver_data;
 
 	d = cmd64x_chipsets[idx];
diff --git a/drivers/ide/pci/cs5520.c b/drivers/ide/pci/cs5520.c
index aa98e81..0466462 100644
--- a/drivers/ide/pci/cs5520.c
+++ b/drivers/ide/pci/cs5520.c
@@ -141,7 +141,7 @@
 		.pio_mask	= ATA_PIO4,			\
 	}
 
-static ide_pci_device_t cyrix_chipsets[] __devinitdata = {
+static const struct ide_port_info cyrix_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_CS_DEV("Cyrix 5510"),
 	/* 1 */ DECLARE_CS_DEV("Cyrix 5520")
 };
@@ -154,9 +154,8 @@
  
 static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_hwif_t *hwif = NULL, *mate = NULL;
-	ata_index_t index;
-	ide_pci_device_t *d = &cyrix_chipsets[id->driver_data];
+	const struct ide_port_info *d = &cyrix_chipsets[id->driver_data];
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	ide_setup_pci_noise(dev, d);
 
@@ -172,29 +171,14 @@
 		return -ENODEV;
 	}
 
-	index.all = 0xf0f0;
-
 	/*
 	 *	Now the chipset is configured we can let the core
 	 *	do all the device setup for us
 	 */
 
-	ide_pci_setup_ports(dev, d, 14, &index);
+	ide_pci_setup_ports(dev, d, 14, &idx[0]);
 
-	if ((index.b.low & 0xf0) != 0xf0)
-		hwif = &ide_hwifs[index.b.low];
-	if ((index.b.high & 0xf0) != 0xf0)
-		mate = &ide_hwifs[index.b.high];
-
-	if (hwif)
-		probe_hwif_init(hwif);
-	if (mate)
-		probe_hwif_init(mate);
-
-	if (hwif)
-		ide_proc_register_port(hwif);
-	if (mate)
-		ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c
index ba0c6eb..5994089 100644
--- a/drivers/ide/pci/cs5530.c
+++ b/drivers/ide/pci/cs5530.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/ide/pci/cs5530.c		Version 0.76	Aug 3 2007
+ * linux/drivers/ide/pci/cs5530.c		Version 0.77	Sep 24 2007
  *
  * Copyright (C) 2000			Andre Hedrick <andre@linux-ide.org>
  * Copyright (C) 2000			Mark Lord <mlord@pobox.com>
@@ -146,7 +146,6 @@
 static unsigned int __devinit init_chipset_cs5530 (struct pci_dev *dev, const char *name)
 {
 	struct pci_dev *master_0 = NULL, *cs5530_0 = NULL;
-	unsigned long flags;
 
 	if (pci_resource_start(dev, 4) == 0)
 		return -EFAULT;
@@ -171,9 +170,6 @@
 		goto out;
 	}
 
-	spin_lock_irqsave(&ide_lock, flags);
-		/* all CPUs (there should only be one CPU with this chipset) */
-
 	/*
 	 * Enable BusMaster and MemoryWriteAndInvalidate for the cs5530:
 	 * -->  OR 0x14 into 16-bit PCI COMMAND reg of function 0 of the cs5530
@@ -224,8 +220,6 @@
 	pci_write_config_byte(master_0, 0x42, 0x00);
 	pci_write_config_byte(master_0, 0x43, 0xc1);
 
-	spin_unlock_irqrestore(&ide_lock, flags);
-
 out:
 	pci_dev_put(master_0);
 	pci_dev_put(cs5530_0);
@@ -261,7 +255,7 @@
 	hwif->udma_filter = cs5530_udma_filter;
 }
 
-static ide_pci_device_t cs5530_chipset __devinitdata = {
+static const struct ide_port_info cs5530_chipset __devinitdata = {
 	.name		= "CS5530",
 	.init_chipset	= init_chipset_cs5530,
 	.init_hwif	= init_hwif_cs5530,
diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c
index 5ac82ff..9094916 100644
--- a/drivers/ide/pci/cs5535.c
+++ b/drivers/ide/pci/cs5535.c
@@ -186,7 +186,7 @@
 	hwif->cbl = cs5535_cable_detect(hwif->pci_dev);
 }
 
-static ide_pci_device_t cs5535_chipset __devinitdata = {
+static const struct ide_port_info cs5535_chipset __devinitdata = {
 	.name		= "CS5535",
 	.init_hwif	= init_hwif_cs5535,
 	.host_flags	= IDE_HFLAG_SINGLE | IDE_HFLAG_POST_SET_MODE |
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index efc20bd..3ef4fc1 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -428,7 +428,6 @@
  */
 static void __devinit init_hwif_cy82c693(ide_hwif_t *hwif)
 {
-	hwif->chipset = ide_cy82c693;
 	hwif->set_pio_mode = &cy82c693_set_pio_mode;
 
 	if (hwif->dma_base == 0)
@@ -449,11 +448,12 @@
 	}
 }
 
-static ide_pci_device_t cy82c693_chipset __devinitdata = {
+static const struct ide_port_info cy82c693_chipset __devinitdata = {
 	.name		= "CY82C693",
 	.init_chipset	= init_chipset_cy82c693,
 	.init_iops	= init_iops_cy82c693,
 	.init_hwif	= init_hwif_cy82c693,
+	.chipset	= ide_cy82c693,
 	.host_flags	= IDE_HFLAG_SINGLE | IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 			  IDE_HFLAG_BOOTABLE,
 	.pio_mask	= ATA_PIO4,
diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c
index 46f4a88..8382908 100644
--- a/drivers/ide/pci/delkin_cb.c
+++ b/drivers/ide/pci/delkin_cb.c
@@ -80,7 +80,7 @@
 	hw.irq = dev->irq;
 	hw.chipset = ide_pci;		/* this enables IRQ sharing */
 
-	rc = ide_register_hw_with_fixup(&hw, 0, &hwif, ide_undecoded_slave);
+	rc = ide_register_hw(&hw, &ide_undecoded_slave, 0, &hwif);
 	if (rc < 0) {
 		printk(KERN_ERR "delkin_cb: ide_register_hw failed (%d)\n", rc);
 		pci_disable_device(dev);
diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index 5116583..f44d708 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -54,37 +54,24 @@
 module_param_named(all_generic_ide, ide_generic_all, bool, 0444);
 MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers.");
 
-static void __devinit init_hwif_generic (ide_hwif_t *hwif)
-{
-	switch(hwif->pci_dev->device) {
-		case PCI_DEVICE_ID_UMC_UM8673F:
-		case PCI_DEVICE_ID_UMC_UM8886A:
-		case PCI_DEVICE_ID_UMC_UM8886BF:
-			hwif->irq = hwif->channel ? 15 : 14;
-			break;
-		default:
-			break;
-	}
-}
+#define IDE_HFLAGS_UMC (IDE_HFLAG_NO_DMA | IDE_HFLAG_FORCE_LEGACY_IRQS)
 
-#define DECLARE_GENERIC_PCI_DEV(name_str, dma_setting) \
+#define DECLARE_GENERIC_PCI_DEV(name_str, extra_flags) \
 	{ \
 		.name		= name_str, \
-		.init_hwif	= init_hwif_generic, \
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA | \
-				  dma_setting | \
+				  extra_flags | \
 				  IDE_HFLAG_BOOTABLE, \
 		.swdma_mask	= ATA_SWDMA2, \
 		.mwdma_mask	= ATA_MWDMA2, \
 		.udma_mask	= ATA_UDMA6, \
 	}
 
-static ide_pci_device_t generic_chipsets[] __devinitdata = {
+static const struct ide_port_info generic_chipsets[] __devinitdata = {
 	/*  0 */ DECLARE_GENERIC_PCI_DEV("Unknown",	0),
 
 	{	/* 1 */
 		.name		= "NS87410",
-		.init_hwif	= init_hwif_generic,
 		.enablebits	= {{0x43,0x08,0x08}, {0x47,0x08,0x08}},
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 				  IDE_HFLAG_BOOTABLE,
@@ -95,16 +82,15 @@
 
 	/*  2 */ DECLARE_GENERIC_PCI_DEV("SAMURAI",	0),
 	/*  3 */ DECLARE_GENERIC_PCI_DEV("HT6565",	0),
-	/*  4 */ DECLARE_GENERIC_PCI_DEV("UM8673F",	IDE_HFLAG_NO_DMA),
-	/*  5 */ DECLARE_GENERIC_PCI_DEV("UM8886A",	IDE_HFLAG_NO_DMA),
-	/*  6 */ DECLARE_GENERIC_PCI_DEV("UM8886BF",	IDE_HFLAG_NO_DMA),
+	/*  4 */ DECLARE_GENERIC_PCI_DEV("UM8673F",	IDE_HFLAGS_UMC),
+	/*  5 */ DECLARE_GENERIC_PCI_DEV("UM8886A",	IDE_HFLAGS_UMC),
+	/*  6 */ DECLARE_GENERIC_PCI_DEV("UM8886BF",	IDE_HFLAGS_UMC),
 	/*  7 */ DECLARE_GENERIC_PCI_DEV("HINT_IDE",	0),
 	/*  8 */ DECLARE_GENERIC_PCI_DEV("VIA_IDE",	IDE_HFLAG_NO_AUTODMA),
 	/*  9 */ DECLARE_GENERIC_PCI_DEV("OPTI621V",	IDE_HFLAG_NO_AUTODMA),
 
 	{	/* 10 */
 		.name		= "VIA8237SATA",
-		.init_hwif	= init_hwif_generic,
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 				  IDE_HFLAG_OFF_BOARD,
 		.swdma_mask	= ATA_SWDMA2,
@@ -118,7 +104,6 @@
 
 	{	/* 14 */
 		.name		= "Revolution",
-		.init_hwif	= init_hwif_generic,
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 				  IDE_HFLAG_OFF_BOARD,
 		.swdma_mask	= ATA_SWDMA2,
@@ -138,7 +123,7 @@
  
 static int __devinit generic_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d = &generic_chipsets[id->driver_data];
+	const struct ide_port_info *d = &generic_chipsets[id->driver_data];
 	int ret = -ENODEV;
 
 	/* Don't use the generic entry unless instructed to do so */
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
index 67af1a7..ae6307f 100644
--- a/drivers/ide/pci/hpt34x.c
+++ b/drivers/ide/pci/hpt34x.c
@@ -129,7 +129,7 @@
 	hwif->set_dma_mode = &hpt34x_set_mode;
 }
 
-static ide_pci_device_t hpt34x_chipsets[] __devinitdata = {
+static const struct ide_port_info hpt34x_chipsets[] __devinitdata = {
 	{ /* 0 */
 		.name		= "HPT343",
 		.init_chipset	= init_chipset_hpt34x,
@@ -158,7 +158,7 @@
 
 static int __devinit hpt34x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d;
+	const struct ide_port_info *d;
 	u16 pcicmd = 0;
 
 	pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 18f5b7d..612b795 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -1425,7 +1425,7 @@
 	return 0;
 }
 
-static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
+static const struct ide_port_info hpt366_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "HPT36x",
 		.init_chipset	= init_chipset_hpt366,
@@ -1510,7 +1510,7 @@
 {
 	struct hpt_info *info = NULL;
 	struct pci_dev *dev2 = NULL;
-	ide_pci_device_t d;
+	struct ide_port_info d;
 	u8 idx = id->driver_data;
 	u8 rev = dev->revision;
 
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index dfbe6051..90b52ed 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -193,7 +193,7 @@
 		.udma_mask	= ATA_UDMA6,		\
 	}
 
-static ide_pci_device_t it8213_chipsets[] __devinitdata = {
+static const struct ide_port_info it8213_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_ITE_DEV("IT8213"),
 };
 
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index ec45b72..1a7ddd1 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -638,7 +638,7 @@
 		.pio_mask	= ATA_PIO4,		\
 	}
 
-static ide_pci_device_t it821x_chipsets[] __devinitdata = {
+static const struct ide_port_info it821x_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_ITE_DEV("IT8212"),
 };
 
diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c
index 2eeff67..bdf64d9 100644
--- a/drivers/ide/pci/jmicron.c
+++ b/drivers/ide/pci/jmicron.c
@@ -118,7 +118,7 @@
 		hwif->cbl = ata66_jmicron(hwif);
 }
 
-static ide_pci_device_t jmicron_chipset __devinitdata = {
+static const struct ide_port_info jmicron_chipset __devinitdata = {
 	.name		= "JMB",
 	.init_hwif	= init_hwif_jmicron,
 	.host_flags	= IDE_HFLAG_BOOTABLE,
diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index d21b589..d4df464 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -260,7 +260,7 @@
 	hwif->ide_dma_end = &ns87415_ide_dma_end;
 }
 
-static ide_pci_device_t ns87415_chipset __devinitdata = {
+static const struct ide_port_info ns87415_chipset __devinitdata = {
 	.name		= "NS87415",
 #ifdef CONFIG_SUPERIO
 	.init_iops	= init_iops_ns87415,
diff --git a/drivers/ide/pci/opti621.c b/drivers/ide/pci/opti621.c
index 3573ffe..8953d9c 100644
--- a/drivers/ide/pci/opti621.c
+++ b/drivers/ide/pci/opti621.c
@@ -1,5 +1,5 @@
 /*
- *  linux/drivers/ide/pci/opti621.c		Version 0.8	Aug 27, 2007
+ *  linux/drivers/ide/pci/opti621.c		Version 0.9	Sep 24, 2007
  *
  *  Copyright (C) 1996-1998  Linus Torvalds & authors (see below)
  */
@@ -133,6 +133,8 @@
 #define PIO_NOT_EXIST 254
 #define PIO_DONT_KNOW 255
 
+static DEFINE_SPINLOCK(opti621_lock);
+
 /* there are stored pio numbers from other calls of opti621_set_pio_mode */
 static void compute_pios(ide_drive_t *drive, const u8 pio)
 /* Store values into drive->drive_data
@@ -278,7 +280,7 @@
 		second.recovery_time, drdy);
 #endif
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&opti621_lock, flags);
 
      	reg_base = hwif->io_ports[IDE_DATA_OFFSET];
 
@@ -317,7 +319,7 @@
 	/*  and read prefetch for both drives */
 	write_reg(misc, MISC_REG);
 
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&opti621_lock, flags);
 }
 
 /*
@@ -331,7 +333,7 @@
 	hwif->set_pio_mode = &opti621_set_pio_mode;
 }
 
-static ide_pci_device_t opti621_chipsets[] __devinitdata = {
+static const struct ide_port_info opti621_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "OPTI621",
 		.init_hwif	= init_hwif_opti621,
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index d1e78234..4234efe 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -513,7 +513,7 @@
 		.udma_mask	= udma, \
 	}
 
-static ide_pci_device_t pdcnew_chipsets[] __devinitdata = {
+static const struct ide_port_info pdcnew_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_PDCNEW_DEV("PDC20268", ATA_UDMA5),
 	/* 1 */ DECLARE_PDCNEW_DEV("PDC20269", ATA_UDMA6),
 	/* 2 */ DECLARE_PDCNEW_DEV("PDC20270", ATA_UDMA5),
@@ -534,7 +534,7 @@
  
 static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d;
+	const struct ide_port_info *d;
 	struct pci_dev *bridge = dev->bus->self;
 	u8 idx = id->driver_data;
 
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index 2930612..e09742e 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -302,13 +302,6 @@
 
 static void __devinit init_hwif_pdc202xx(ide_hwif_t *hwif)
 {
-	struct pci_dev *dev = hwif->pci_dev;
-
-	/* PDC20265 has problems with large LBA48 requests */
-	if ((dev->device == PCI_DEVICE_ID_PROMISE_20267) ||
-	    (dev->device == PCI_DEVICE_ID_PROMISE_20265))
-		hwif->rqsize = 256;
-
 	hwif->set_pio_mode = &pdc202xx_set_pio_mode;
 	hwif->set_dma_mode = &pdc202xx_set_mode;
 
@@ -382,7 +375,7 @@
 	}
 }
 
-#define DECLARE_PDC2026X_DEV(name_str, udma) \
+#define DECLARE_PDC2026X_DEV(name_str, udma, extra_flags) \
 	{ \
 		.name		= name_str, \
 		.init_chipset	= init_chipset_pdc202xx, \
@@ -390,13 +383,14 @@
 		.init_dma	= init_dma_pdc202xx, \
 		.extra		= 48, \
 		.host_flags	= IDE_HFLAG_ERROR_STOPS_FIFO | \
+				  extra_flags | \
 				  IDE_HFLAG_OFF_BOARD, \
 		.pio_mask	= ATA_PIO4, \
 		.mwdma_mask	= ATA_MWDMA2, \
 		.udma_mask	= udma, \
 	}
 
-static ide_pci_device_t pdc202xx_chipsets[] __devinitdata = {
+static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "PDC20246",
 		.init_chipset	= init_chipset_pdc202xx,
@@ -410,10 +404,10 @@
 		.udma_mask	= ATA_UDMA2,
 	},
 
-	/* 1 */ DECLARE_PDC2026X_DEV("PDC20262", ATA_UDMA4),
-	/* 2 */ DECLARE_PDC2026X_DEV("PDC20263", ATA_UDMA4),
-	/* 3 */ DECLARE_PDC2026X_DEV("PDC20265", ATA_UDMA5),
-	/* 4 */ DECLARE_PDC2026X_DEV("PDC20267", ATA_UDMA5),
+	/* 1 */ DECLARE_PDC2026X_DEV("PDC20262", ATA_UDMA4, 0),
+	/* 2 */ DECLARE_PDC2026X_DEV("PDC20263", ATA_UDMA4, 0),
+	/* 3 */ DECLARE_PDC2026X_DEV("PDC20265", ATA_UDMA5, IDE_HFLAG_RQSIZE_256),
+	/* 4 */ DECLARE_PDC2026X_DEV("PDC20267", ATA_UDMA5, IDE_HFLAG_RQSIZE_256),
 };
 
 /**
@@ -427,7 +421,7 @@
  
 static int __devinit pdc202xx_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d;
+	const struct ide_port_info *d;
 	u8 idx = id->driver_data;
 
 	d = &pdc202xx_chipsets[idx];
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index ec0c6e9..9329d4a 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -396,7 +396,7 @@
 		.udma_mask	= udma, \
 	}
 
-static ide_pci_device_t piix_pci_info[] __devinitdata = {
+static const struct ide_port_info piix_pci_info[] __devinitdata = {
 	/*  0 */ DECLARE_PIIX_DEV("PIIXa",	0x00),	/* no udma */
 	/*  1 */ DECLARE_PIIX_DEV("PIIXb",	0x00),	/* no udma */
 
@@ -449,9 +449,7 @@
  
 static int __devinit piix_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d = &piix_pci_info[id->driver_data];
-
-	return ide_setup_pci_device(dev, d);
+	return ide_setup_pci_device(dev, &piix_pci_info[id->driver_data]);
 }
 
 /**
diff --git a/drivers/ide/pci/rz1000.c b/drivers/ide/pci/rz1000.c
index dd2583e..6b10ae2 100644
--- a/drivers/ide/pci/rz1000.c
+++ b/drivers/ide/pci/rz1000.c
@@ -35,13 +35,13 @@
 	u16 reg;
 	struct pci_dev *dev = hwif->pci_dev;
 
-	hwif->chipset = ide_rz1000;
 	if (!pci_read_config_word (dev, 0x40, &reg) &&
 	    !pci_write_config_word(dev, 0x40, reg & 0xdfff)) {
 		printk(KERN_INFO "%s: disabled chipset read-ahead "
 			"(buggy RZ1000/RZ1001)\n", hwif->name);
 	} else {
-		hwif->serialized = 1;
+		if (hwif->mate)
+			hwif->mate->serialized = hwif->serialized = 1;
 		hwif->drives[0].no_unmask = 1;
 		hwif->drives[1].no_unmask = 1;
 		printk(KERN_INFO "%s: serialized, disabled unmasking "
@@ -49,9 +49,10 @@
 	}
 }
 
-static ide_pci_device_t rz1000_chipset __devinitdata = {
+static const struct ide_port_info rz1000_chipset __devinitdata = {
 	.name		= "RZ100x",
 	.init_hwif	= init_hwif_rz1000,
+	.chipset	= ide_rz1000,
 	.host_flags	= IDE_HFLAG_NO_DMA | IDE_HFLAG_BOOTABLE,
 };
 
diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c
index b2423e0..d2c8b55 100644
--- a/drivers/ide/pci/sc1200.c
+++ b/drivers/ide/pci/sc1200.c
@@ -372,7 +372,7 @@
 	hwif->ide_dma_end   = &sc1200_ide_dma_end;
 }
 
-static ide_pci_device_t sc1200_chipset __devinitdata = {
+static const struct ide_port_info sc1200_chipset __devinitdata = {
 	.name		= "SC1200",
 	.init_hwif	= init_hwif_sc1200,
 	.host_flags	= IDE_HFLAG_SERIALIZE |
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index ae9b5033..ebb7132 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -538,12 +538,13 @@
 /**
  *	init_setup_scc	-	set up an SCC PATA Controller
  *	@dev: PCI device
- *	@d: IDE PCI device
+ *	@d: IDE port info
  *
  *	Perform the initial set up for this device.
  */
 
-static int __devinit init_setup_scc(struct pci_dev *dev, ide_pci_device_t *d)
+static int __devinit init_setup_scc(struct pci_dev *dev,
+				    const struct ide_port_info *d)
 {
 	unsigned long ctl_base;
 	unsigned long dma_base;
@@ -702,7 +703,7 @@
       .pio_mask		= ATA_PIO4,			\
   }
 
-static ide_pci_device_t scc_chipsets[] __devinitdata = {
+static const struct ide_port_info scc_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_SCC_DEV("sccIDE"),
 };
 
@@ -717,9 +718,7 @@
 
 static int __devinit scc_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d = &scc_chipsets[id->driver_data];
-
-	return init_setup_scc(dev, d);
+	return init_setup_scc(dev, &scc_chipsets[id->driver_data]);
 }
 
 /**
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index a3d880e..a728031 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -158,13 +158,6 @@
 
 	u8 ultra_enable	 = 0, ultra_timing = 0, dma_timing = 0;
 
-	/* If we are about to put a disk into UDMA mode we screwed up.
-	   Our code assumes we never _ever_ do this on an OSB4 */
-	   
-	if(dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4 &&
-		drive->media == ide_disk && speed >= XFER_UDMA_0)
-			BUG();
-
 	pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing);
 	pci_read_config_byte(dev, 0x54, &ultra_enable);
 
@@ -373,7 +366,7 @@
 	}
 }
 
-static ide_pci_device_t serverworks_chipsets[] __devinitdata = {
+static const struct ide_port_info serverworks_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "SvrWks OSB4",
 		.init_chipset	= init_chipset_svwks,
@@ -430,7 +423,7 @@
  
 static int __devinit svwks_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t d;
+	struct ide_port_info d;
 	u8 idx = id->driver_data;
 
 	d = serverworks_chipsets[idx];
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index 5af74ea..de820aa 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -614,6 +614,7 @@
 	void __iomem *virt_base;
 	ide_hwif_t *hwif;
 	int h;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	/*
 	 * Find an empty HWIF; if none available, return -ENOMEM.
@@ -654,10 +655,12 @@
 	}
 
 	if (hwif->io_ports[IDE_DATA_OFFSET] != cmd_base) {
+		hw_regs_t hw;
+
 		/* Initialize the IO registers */
-		sgiioc4_init_hwif_ports(&hwif->hw, cmd_base, ctl, irqport);
-		memcpy(hwif->io_ports, hwif->hw.io_ports,
-		       sizeof (hwif->io_ports));
+		memset(&hw, 0, sizeof(hw));
+		sgiioc4_init_hwif_ports(&hw, cmd_base, ctl, irqport);
+		memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 		hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
 	}
 
@@ -679,11 +682,10 @@
 
 	ide_init_sgiioc4(hwif);
 
-	if (probe_hwif_init(hwif))
-		return -EIO;
+	idx[0] = hwif->index;
 
-	/* Create /proc/ide entries */
-	ide_proc_register_port(hwif);
+	if (ide_device_add(idx))
+		return -EIO;
 
 	return 0;
 }
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 689786d..dc915cb 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/ide/pci/siimage.c		Version 1.17	Oct 18 2007
+ * linux/drivers/ide/pci/siimage.c		Version 1.18	Oct 18 2007
  *
  * Copyright (C) 2001-2002	Andre Hedrick <andre@linux-ide.org>
  * Copyright (C) 2003		Red Hat <alan@redhat.com>
@@ -57,8 +57,8 @@
  
 static int pdev_is_sata(struct pci_dev *pdev)
 {
-	switch(pdev->device)
-	{
+#ifdef CONFIG_BLK_DEV_IDE_SATA
+	switch(pdev->device) {
 		case PCI_DEVICE_ID_SII_3112:
 		case PCI_DEVICE_ID_SII_1210SA:
 			return 1;
@@ -66,9 +66,10 @@
 			return 0;
 	}
 	BUG();
+#endif
 	return 0;
 }
- 
+
 /**
  *	is_sata			-	check if hwif is SATA
  *	@hwif:	interface to check
@@ -136,7 +137,7 @@
  *	SI3112 SATA controller life is a bit simpler.
  */
 
-static u8 sil_udma_filter(ide_drive_t *drive)
+static u8 sil_pata_udma_filter(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	unsigned long base = (unsigned long) hwif->hwif_data;
@@ -147,23 +148,23 @@
 	else
 		pci_read_config_byte(hwif->pci_dev, 0x8A, &scsc);
 
-	if (is_sata(hwif)) {
-		mask = strstr(drive->id->model, "Maxtor") ? 0x3f : 0x7f;
-		goto out;
-	}
-
 	if ((scsc & 0x30) == 0x10)	/* 133 */
-		mask = 0x7f;
+		mask = ATA_UDMA6;
 	else if ((scsc & 0x30) == 0x20)	/* 2xPCI */
-		mask = 0x7f;
+		mask = ATA_UDMA6;
 	else if ((scsc & 0x30) == 0x00)	/* 100 */
-		mask = 0x3f;
+		mask = ATA_UDMA5;
 	else 	/* Disabled ? */
 		BUG();
-out:
+
 	return mask;
 }
 
+static u8 sil_sata_udma_filter(ide_drive_t *drive)
+{
+	return strstr(drive->id->model, "Maxtor") ? ATA_UDMA5 : ATA_UDMA6;
+}
+
 /**
  *	sil_set_pio_mode	-	set host controller for PIO mode
  *	@drive: drive
@@ -340,10 +341,11 @@
 static int siimage_mmio_ide_dma_test_irq (ide_drive_t *drive)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
-	unsigned long base	= (unsigned long)hwif->hwif_data;
 	unsigned long addr	= siimage_selreg(hwif, 0x1);
 
 	if (SATA_ERROR_REG) {
+		unsigned long base = (unsigned long)hwif->hwif_data;
+
 		u32 ext_stat = readl((void __iomem *)(base + 0x10));
 		u8 watchdog = 0;
 		if (ext_stat & ((hwif->channel) ? 0x40 : 0x10)) {
@@ -376,7 +378,7 @@
 }
 
 /**
- *	siimage_busproc		-	bus isolation ioctl
+ *	sil_sata_busproc	-	bus isolation IOCTL
  *	@drive: drive to isolate/restore
  *	@state: bus state to set
  *
@@ -384,8 +386,8 @@
  *	SATA controller the work required is quite limited, we 
  *	just have to clean up the statistics
  */
- 
-static int siimage_busproc (ide_drive_t * drive, int state)
+
+static int sil_sata_busproc(ide_drive_t * drive, int state)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
 	u32 stat_config		= 0;
@@ -417,14 +419,14 @@
 }
 
 /**
- *	siimage_reset_poll	-	wait for sata reset
+ *	sil_sata_reset_poll	-	wait for SATA reset
  *	@drive: drive we are resetting
  *
  *	Poll the SATA phy and see whether it has come back from the dead
  *	yet.
  */
- 
-static int siimage_reset_poll (ide_drive_t *drive)
+
+static int sil_sata_reset_poll(ide_drive_t *drive)
 {
 	if (SATA_STATUS_REG) {
 		ide_hwif_t *hwif	= HWIF(drive);
@@ -436,27 +438,22 @@
 			HWGROUP(drive)->polling = 0;
 			return ide_started;
 		}
-		return 0;
-	} else {
-		return 0;
 	}
+
+	return 0;
 }
 
 /**
- *	siimage_pre_reset	-	reset hook
+ *	sil_sata_pre_reset	-	reset hook
  *	@drive: IDE device being reset
  *
  *	For the SATA devices we need to handle recalibration/geometry
  *	differently
  */
- 
-static void siimage_pre_reset (ide_drive_t *drive)
-{
-	if (drive->media != ide_disk)
-		return;
 
-	if (is_sata(HWIF(drive)))
-	{
+static void sil_sata_pre_reset(ide_drive_t *drive)
+{
+	if (drive->media == ide_disk) {
 		drive->special.b.set_geometry = 0;
 		drive->special.b.recalibrate = 0;
 	}
@@ -502,7 +499,6 @@
 			drive->failures++;
 		}
 	}
-
 }
 
 /**
@@ -758,16 +754,11 @@
 		hwif->sata_misc[SATA_IEN_OFFSET]	= base + 0x148;
 	}
 
-	hw.irq				= hwif->pci_dev->irq;
+	memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 
-	memcpy(&hwif->hw, &hw, sizeof(hw));
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports));
+	hwif->irq = dev->irq;
 
-	hwif->irq			= hw.irq;
-
-       	base = (unsigned long) addr;
-
-	hwif->dma_base			= base + (ch ? 0x08 : 0x00);
+	hwif->dma_base = (unsigned long)addr + (ch ? 0x08 : 0x00);
 
 	hwif->mmio = 1;
 }
@@ -864,28 +855,31 @@
 
 static void __devinit init_hwif_siimage(ide_hwif_t *hwif)
 {
+	u8 sata = is_sata(hwif);
+
 	hwif->resetproc = &siimage_reset;
 	hwif->set_pio_mode = &sil_set_pio_mode;
 	hwif->set_dma_mode = &sil_set_dma_mode;
-	hwif->reset_poll = &siimage_reset_poll;
-	hwif->pre_reset = &siimage_pre_reset;
-	hwif->udma_filter = &sil_udma_filter;
 
-	if(is_sata(hwif)) {
+	if (sata) {
 		static int first = 1;
 
-		hwif->busproc   = &siimage_busproc;
+		hwif->busproc = &sil_sata_busproc;
+		hwif->reset_poll = &sil_sata_reset_poll;
+		hwif->pre_reset = &sil_sata_pre_reset;
+		hwif->udma_filter = &sil_sata_udma_filter;
 
 		if (first) {
 			printk(KERN_INFO "siimage: For full SATA support you should use the libata sata_sil module.\n");
 			first = 0;
 		}
-	}
+	} else
+		hwif->udma_filter = &sil_pata_udma_filter;
 
 	if (hwif->dma_base == 0)
 		return;
 
-	if (is_sata(hwif))
+	if (sata)
 		hwif->host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
 
 	if (hwif->cbl != ATA_CBL_PATA40_SHORT)
@@ -911,7 +905,7 @@
 		.udma_mask	= ATA_UDMA6,		\
 	}
 
-static ide_pci_device_t siimage_chipsets[] __devinitdata = {
+static const struct ide_port_info siimage_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_SII_DEV("SiI680"),
 	/* 1 */ DECLARE_SII_DEV("SiI3112 Serial ATA"),
 	/* 2 */ DECLARE_SII_DEV("Adaptec AAR-1210SA")
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index c1d280b..6b7bb53 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -264,7 +264,7 @@
 	if (mode >= XFER_MW_DMA_0) {
 		t1 &= ~0x04;	/* disable UDMA */
 		idx = mode - XFER_MW_DMA_0 + 5;
-	}
+	} else
 		idx = mode - XFER_PIO_0;
 	t1 |= ini_time_value[clk][idx] << 12;
 	t1 |= act_time_value[clk][idx] << 16;
@@ -579,7 +579,7 @@
 		hwif->cbl = ata66_sis5513(hwif);
 }
 
-static ide_pci_device_t sis5513_chipset __devinitdata = {
+static const struct ide_port_info sis5513_chipset __devinitdata = {
 	.name		= "SIS5513",
 	.init_chipset	= init_chipset_sis5513,
 	.init_hwif	= init_hwif_sis5513,
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index 0dce459..147d783 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -361,13 +361,6 @@
 	hwif->selectproc	= &sl82c105_selectproc;
 	hwif->resetproc 	= &sl82c105_resetproc;
 
-	/*
-	 * We support 32-bit I/O on this interface, and
-	 * it doesn't have problems with interrupts.
-	 */
-	hwif->drives[0].io_32bit = hwif->drives[1].io_32bit = 1;
-	hwif->drives[0].unmask   = hwif->drives[1].unmask   = 1;
-
 	if (!hwif->dma_base)
 		return;
 
@@ -394,12 +387,15 @@
 		hwif->serialized = hwif->mate->serialized = 1;
 }
 
-static ide_pci_device_t sl82c105_chipset __devinitdata = {
+static const struct ide_port_info sl82c105_chipset __devinitdata = {
 	.name		= "W82C105",
 	.init_chipset	= init_chipset_sl82c105,
 	.init_hwif	= init_hwif_sl82c105,
 	.enablebits	= {{0x40,0x01,0x01}, {0x40,0x10,0x10}},
-	.host_flags	= IDE_HFLAG_NO_AUTODMA | IDE_HFLAG_BOOTABLE,
+	.host_flags	= IDE_HFLAG_IO_32BIT |
+			  IDE_HFLAG_UNMASK_IRQS |
+			  IDE_HFLAG_NO_AUTODMA |
+			  IDE_HFLAG_BOOTABLE,
 	.pio_mask	= ATA_PIO5,
 };
 
diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c
index 4f22dff..eb4445b 100644
--- a/drivers/ide/pci/slc90e66.c
+++ b/drivers/ide/pci/slc90e66.c
@@ -1,5 +1,5 @@
 /*
- *  linux/drivers/ide/pci/slc90e66.c	Version 0.18	Aug 9, 2007
+ *  linux/drivers/ide/pci/slc90e66.c	Version 0.19	Sep 24, 2007
  *
  *  Copyright (C) 2000-2002 Andre Hedrick <andre@linux-ide.org>
  *  Copyright (C) 2006-2007 MontaVista Software, Inc. <source@mvista.com>
@@ -21,6 +21,8 @@
 
 #include <asm/io.h>
 
+static DEFINE_SPINLOCK(slc90e66_lock);
+
 static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
@@ -40,7 +42,7 @@
 					{ 2, 1 },
 					{ 2, 3 }, };
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&slc90e66_lock, flags);
 	pci_read_config_word(dev, master_port, &master_data);
 
 	if (pio > 1)
@@ -71,7 +73,7 @@
 	pci_write_config_word(dev, master_port, master_data);
 	if (is_slave)
 		pci_write_config_byte(dev, slave_port, slave_data);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&slc90e66_lock, flags);
 }
 
 static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
@@ -146,7 +148,7 @@
 		hwif->cbl = (reg47 & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
 }
 
-static ide_pci_device_t slc90e66_chipset __devinitdata = {
+static const struct ide_port_info slc90e66_chipset __devinitdata = {
 	.name		= "SLC90E66",
 	.init_hwif	= init_hwif_slc90e66,
 	.enablebits	= {{0x41,0x80,0x80}, {0x43,0x80,0x80}},
diff --git a/drivers/ide/pci/tc86c001.c b/drivers/ide/pci/tc86c001.c
index 631506e..a66ebd1 100644
--- a/drivers/ide/pci/tc86c001.c
+++ b/drivers/ide/pci/tc86c001.c
@@ -218,7 +218,7 @@
 	return err;
 }
 
-static ide_pci_device_t tc86c001_chipset __devinitdata = {
+static const struct ide_port_info tc86c001_chipset __devinitdata = {
 	.name		= "TC86C001",
 	.init_chipset	= init_chipset_tc86c001,
 	.init_hwif	= init_hwif_tc86c001,
diff --git a/drivers/ide/pci/triflex.c b/drivers/ide/pci/triflex.c
index 30b52f6..a227c41 100644
--- a/drivers/ide/pci/triflex.c
+++ b/drivers/ide/pci/triflex.c
@@ -102,7 +102,7 @@
 	hwif->set_dma_mode = &triflex_set_mode;
 }
 
-static ide_pci_device_t triflex_device __devinitdata = {
+static const struct ide_port_info triflex_device __devinitdata = {
 	.name		= "TRIFLEX",
 	.init_hwif	= init_hwif_triflex,
 	.enablebits	= {{0x80, 0x01, 0x01}, {0x80, 0x02, 0x02}},
diff --git a/drivers/ide/pci/trm290.c b/drivers/ide/pci/trm290.c
index 140d486..5011ba2 100644
--- a/drivers/ide/pci/trm290.c
+++ b/drivers/ide/pci/trm290.c
@@ -250,7 +250,6 @@
 	u8 reg = 0;
 	struct pci_dev *dev = hwif->pci_dev;
 
-	hwif->chipset = ide_trm290;
 	cfgbase = pci_resource_start(dev, 4);
 	if ((dev->class & 5) && cfgbase) {
 		hwif->config_data = cfgbase;
@@ -320,9 +319,10 @@
 #endif
 }
 
-static ide_pci_device_t trm290_chipset __devinitdata = {
+static const struct ide_port_info trm290_chipset __devinitdata = {
 	.name		= "TRM290",
 	.init_hwif	= init_hwif_trm290,
+	.chipset	= ide_trm290,
 	.host_flags	= IDE_HFLAG_NO_ATAPI_DMA |
 #if 0 /* play it safe for now */
 			  IDE_HFLAG_TRUST_BIOS_FOR_DMA |
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index c8022a9..a0d3c16 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -1,6 +1,6 @@
 /*
  *
- * Version 3.49
+ * Version 3.50
  *
  * VIA IDE driver for Linux. Supported southbridges:
  *
@@ -422,65 +422,40 @@
 
 static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif)
 {
-	struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
-	int i;
-
 	hwif->set_pio_mode = &via_set_pio_mode;
 	hwif->set_dma_mode = &via_set_drive;
 
-#ifdef CONFIG_PPC_CHRP
-	if(machine_is(chrp) && _chrp_type == _CHRP_Pegasos) {
-		hwif->irq = hwif->channel ? 15 : 14;
-	}
-#endif
-
-	for (i = 0; i < 2; i++) {
-		hwif->drives[i].io_32bit = 1;
-		hwif->drives[i].unmask = (vdev->via_config->flags & VIA_NO_UNMASK) ? 0 : 1;
-	}
-
 	if (!hwif->dma_base)
 		return;
 
-	hwif->ultra_mask = vdev->via_config->udma_mask;
-
 	if (hwif->cbl != ATA_CBL_PATA40_SHORT)
 		hwif->cbl = via82cxxx_cable_detect(hwif);
 }
 
-static ide_pci_device_t via82cxxx_chipsets[] __devinitdata = {
-	{	/* 0 */
-		.name		= "VP_IDE",
-		.init_chipset	= init_chipset_via82cxxx,
-		.init_hwif	= init_hwif_via82cxxx,
-		.enablebits	= {{0x40,0x02,0x02}, {0x40,0x01,0x01}},
-		.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |
-				  IDE_HFLAG_PIO_NO_DOWNGRADE |
-				  IDE_HFLAG_POST_SET_MODE |
-				  IDE_HFLAG_NO_AUTODMA |
-				  IDE_HFLAG_BOOTABLE,
-		.pio_mask	= ATA_PIO5,
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-	},{	/* 1 */
-		.name		= "VP_IDE",
-		.init_chipset	= init_chipset_via82cxxx,
-		.init_hwif	= init_hwif_via82cxxx,
-		.enablebits	= {{0x00,0x00,0x00}, {0x00,0x00,0x00}},
-		.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |
-				  IDE_HFLAG_PIO_NO_DOWNGRADE |
-				  IDE_HFLAG_POST_SET_MODE |
-				  IDE_HFLAG_BOOTABLE,
-		.pio_mask	= ATA_PIO5,
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-	}
+static const struct ide_port_info via82cxxx_chipset __devinitdata = {
+	.name		= "VP_IDE",
+	.init_chipset	= init_chipset_via82cxxx,
+	.init_hwif	= init_hwif_via82cxxx,
+	.enablebits	= { { 0x40, 0x02, 0x02 }, { 0x40, 0x01, 0x01 } },
+	.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |
+			  IDE_HFLAG_PIO_NO_DOWNGRADE |
+			  IDE_HFLAG_POST_SET_MODE |
+			  IDE_HFLAG_IO_32BIT |
+			  IDE_HFLAG_BOOTABLE,
+	.pio_mask	= ATA_PIO5,
+	.swdma_mask	= ATA_SWDMA2,
+	.mwdma_mask	= ATA_MWDMA2,
 };
 
 static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	struct pci_dev *isa = NULL;
 	struct via_isa_bridge *via_config;
+	u8 idx = id->driver_data;
+	struct ide_port_info d;
+
+	d = via82cxxx_chipset;
+
 	/*
 	 * Find the ISA bridge and check we know what it is.
 	 */
@@ -490,7 +465,23 @@
 		printk(KERN_WARNING "VP_IDE: Unknown VIA SouthBridge, disabling DMA.\n");
 		return -ENODEV;
 	}
-	return ide_setup_pci_device(dev, &via82cxxx_chipsets[id->driver_data]);
+
+	if (idx == 0)
+		d.host_flags |= IDE_HFLAG_NO_AUTODMA;
+	else
+		d.enablebits[1].reg = d.enablebits[0].reg = 0;
+
+	if ((via_config->flags & VIA_NO_UNMASK) == 0)
+		d.host_flags |= IDE_HFLAG_UNMASK_IRQS;
+
+#ifdef CONFIG_PPC_CHRP
+	if (machine_is(chrp) && _chrp_type == _CHRP_Pegasos)
+		d.host_flags |= IDE_HFLAG_FORCE_LEGACY_IRQS;
+#endif
+
+	d.udma_mask = via_config->udma_mask;
+
+	return ide_setup_pci_device(dev, &d);
 }
 
 static const struct pci_device_id via_pci_tbl[] = {
diff --git a/drivers/ide/ppc/mpc8xx.c b/drivers/ide/ppc/mpc8xx.c
index df2e920..5f0da35 100644
--- a/drivers/ide/ppc/mpc8xx.c
+++ b/drivers/ide/ppc/mpc8xx.c
@@ -316,8 +316,8 @@
 
 	ide_hwifs[data_port].pio_mask = ATA_PIO4;
 	ide_hwifs[data_port].set_pio_mode = m8xx_ide_set_pio_mode;
+	ide_hwifs[data_port].ack_intr = (ide_ack_intr_t *)ide_interrupt_ack;
 
-	hw->ack_intr = (ide_ack_intr_t *) ide_interrupt_ack;
 	/* Enable Harddisk Interrupt,
 	 * and make it edge sensitive
 	 */
@@ -402,8 +402,8 @@
 
 	ide_hwifs[data_port].pio_mask = ATA_PIO4;
 	ide_hwifs[data_port].set_pio_mode = m8xx_ide_set_pio_mode;
+	ide_hwifs[data_port].ack_intr = (ide_ack_intr_t *)ide_interrupt_ack;
 
-	hw->ack_intr = (ide_ack_intr_t *) ide_interrupt_ack;
 	/* Enable Harddisk Interrupt,
 	 * and make it edge sensitive
 	 */
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index c554793..816b531 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1039,6 +1039,8 @@
 {
 	struct device_node *np = pmif->node;
 	const int *bidp;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
+	hw_regs_t hw;
 
 	pmif->cable_80 = 0;
 	pmif->broken_dma = pmif->broken_dma_warn = 0;
@@ -1124,8 +1126,9 @@
 	/* Tell common code _not_ to mess with resources */
 	hwif->mmio = 1;
 	hwif->hwif_data = pmif;
-	pmac_ide_init_hwif_ports(&hwif->hw, pmif->regbase, 0, &hwif->irq);
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+	memset(&hw, 0, sizeof(hw));
+	pmac_ide_init_hwif_ports(&hw, pmif->regbase, 0, &hwif->irq);
+	memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 	hwif->chipset = ide_pmac;
 	hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET] || pmif->mediabay;
 	hwif->hold = pmif->mediabay;
@@ -1163,10 +1166,9 @@
 		pmac_ide_setup_dma(pmif, hwif);
 #endif /* CONFIG_BLK_DEV_IDEDMA_PMAC */
 
-	/* We probe the hwif now */
-	probe_hwif_init(hwif);
+	idx[0] = hwif->index;
 
-	ide_proc_register_port(hwif);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index fff567b..02d14bf 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -147,15 +147,15 @@
 #ifdef CONFIG_BLK_DEV_IDEDMA_PCI
 /**
  *	ide_get_or_set_dma_base		-	setup BMIBA
- *	@d: IDE pci device data
- *	@hwif: Interface
+ *	@d: IDE port info
+ *	@hwif: IDE interface
  *
  *	Fetch the DMA Bus-Master-I/O-Base-Address (BMIBA) from PCI space.
  *	Where a device has a partner that is already in DMA mode we check
  *	and enforce IDE simplex rules.
  */
 
-static unsigned long ide_get_or_set_dma_base(ide_pci_device_t *d, ide_hwif_t *hwif)
+static unsigned long ide_get_or_set_dma_base(const struct ide_port_info *d, ide_hwif_t *hwif)
 {
 	unsigned long	dma_base = 0;
 	struct pci_dev	*dev = hwif->pci_dev;
@@ -225,10 +225,11 @@
 }
 #endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
 
-void ide_setup_pci_noise (struct pci_dev *dev, ide_pci_device_t *d)
+void ide_setup_pci_noise(struct pci_dev *dev, const struct ide_port_info *d)
 {
-	printk(KERN_INFO "%s: IDE controller at PCI slot %s\n",
-			 d->name, pci_name(dev));
+	printk(KERN_INFO "%s: IDE controller (0x%04x:0x%04x rev 0x%02x) at "
+			 " PCI slot %s\n", d->name, dev->vendor, dev->device,
+			 dev->revision, pci_name(dev));
 }
 
 EXPORT_SYMBOL_GPL(ide_setup_pci_noise);
@@ -237,15 +238,15 @@
 /**
  *	ide_pci_enable	-	do PCI enables
  *	@dev: PCI device
- *	@d: IDE pci device data
+ *	@d: IDE port info
  *
  *	Enable the IDE PCI device. We attempt to enable the device in full
  *	but if that fails then we only need BAR4 so we will enable that.
  *	
  *	Returns zero on success or an error code
  */
- 
-static int ide_pci_enable(struct pci_dev *dev, ide_pci_device_t *d)
+
+static int ide_pci_enable(struct pci_dev *dev, const struct ide_port_info *d)
 {
 	int ret;
 
@@ -260,9 +261,9 @@
 	}
 
 	/*
-	 * assume all devices can do 32-bit dma for now. we can add a
-	 * dma mask field to the ide_pci_device_t if we need it (or let
-	 * lower level driver set the dma mask)
+	 * assume all devices can do 32-bit DMA for now, we can add
+	 * a DMA mask field to the struct ide_port_info if we need it
+	 * (or let lower level driver set the DMA mask)
 	 */
 	ret = pci_set_dma_mask(dev, DMA_32BIT_MASK);
 	if (ret < 0) {
@@ -284,13 +285,13 @@
 /**
  *	ide_pci_configure	-	configure an unconfigured device
  *	@dev: PCI device
- *	@d: IDE pci device data
+ *	@d: IDE port info
  *
  *	Enable and configure the PCI device we have been passed.
  *	Returns zero on success or an error code.
  */
- 
-static int ide_pci_configure(struct pci_dev *dev, ide_pci_device_t *d)
+
+static int ide_pci_configure(struct pci_dev *dev, const struct ide_port_info *d)
 {
 	u16 pcicmd = 0;
 	/*
@@ -318,15 +319,15 @@
 
 /**
  *	ide_pci_check_iomem	-	check a register is I/O
- *	@dev: pci device
- *	@d: ide_pci_device
- *	@bar: bar number
+ *	@dev: PCI device
+ *	@d: IDE port info
+ *	@bar: BAR number
  *
  *	Checks if a BAR is configured and points to MMIO space. If so
  *	print an error and return an error code. Otherwise return 0
  */
- 
-static int ide_pci_check_iomem(struct pci_dev *dev, ide_pci_device_t *d, int bar)
+
+static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *d, int bar)
 {
 	ulong flags = pci_resource_flags(dev, bar);
 	
@@ -348,7 +349,7 @@
 /**
  *	ide_hwif_configure	-	configure an IDE interface
  *	@dev: PCI device holding interface
- *	@d: IDE pci data
+ *	@d: IDE port info
  *	@mate: Paired interface if any
  *
  *	Perform the initial set up for the hardware interface structure. This
@@ -357,8 +358,8 @@
  *
  *	Returns the new hardware interface structure, or NULL on a failure
  */
- 
-static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, ide_pci_device_t *d, ide_hwif_t *mate, int port, int irq)
+
+static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, const struct ide_port_info *d, ide_hwif_t *mate, int port, int irq)
 {
 	unsigned long ctl = 0, base = 0;
 	ide_hwif_t *hwif;
@@ -387,19 +388,20 @@
 		return NULL;	/* no room in ide_hwifs[] */
 	if (hwif->io_ports[IDE_DATA_OFFSET] != base ||
 	    hwif->io_ports[IDE_CONTROL_OFFSET] != (ctl | 2)) {
-		memset(&hwif->hw, 0, sizeof(hwif->hw));
-#ifndef IDE_ARCH_OBSOLETE_INIT
-		ide_std_init_ports(&hwif->hw, base, (ctl | 2));
-		hwif->hw.io_ports[IDE_IRQ_OFFSET] = 0;
+		hw_regs_t hw;
+
+		memset(&hw, 0, sizeof(hw));
+#ifndef CONFIG_IDE_ARCH_OBSOLETE_INIT
+		ide_std_init_ports(&hw, base, ctl | 2);
 #else
-		ide_init_hwif_ports(&hwif->hw, base, (ctl | 2), NULL);
+		ide_init_hwif_ports(&hw, base, ctl | 2, NULL);
 #endif
-		memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+		memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 		hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
 	}
-	hwif->chipset = ide_pci;
+	hwif->chipset = d->chipset ? d->chipset : ide_pci;
 	hwif->pci_dev = dev;
-	hwif->cds = (struct ide_pci_device_s *) d;
+	hwif->cds = d;
 	hwif->channel = port;
 
 	if (!hwif->irq)
@@ -414,21 +416,17 @@
 /**
  *	ide_hwif_setup_dma	-	configure DMA interface
  *	@dev: PCI device
- *	@d: IDE pci data
- *	@hwif: Hardware interface we are configuring
+ *	@d: IDE port info
+ *	@hwif: IDE interface
  *
  *	Set up the DMA base for the interface. Enable the master bits as
  *	necessary and attempt to bring the device DMA into a ready to use
  *	state
  */
- 
-#ifndef CONFIG_BLK_DEV_IDEDMA_PCI
-static void ide_hwif_setup_dma(struct pci_dev *dev, ide_pci_device_t *d, ide_hwif_t *hwif)
+
+static void ide_hwif_setup_dma(struct pci_dev *dev, const struct ide_port_info *d, ide_hwif_t *hwif)
 {
-}
-#else
-static void ide_hwif_setup_dma(struct pci_dev *dev, ide_pci_device_t *d, ide_hwif_t *hwif)
-{
+#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
 	u16 pcicmd;
 
 	pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
@@ -460,13 +458,13 @@
 				"(BIOS)\n", hwif->name, d->name);
 		}
 	}
-}
 #endif /* CONFIG_BLK_DEV_IDEDMA_PCI*/
+}
 
 /**
  *	ide_setup_pci_controller	-	set up IDE PCI
  *	@dev: PCI device
- *	@d: IDE PCI data
+ *	@d: IDE port info
  *	@noisy: verbose flag
  *	@config: returned as 1 if we configured the hardware
  *
@@ -474,8 +472,8 @@
  *	up the PCI side of the device, checks that the device is enabled
  *	and enables it if need be
  */
- 
-static int ide_setup_pci_controller(struct pci_dev *dev, ide_pci_device_t *d, int noisy, int *config)
+
+static int ide_setup_pci_controller(struct pci_dev *dev, const struct ide_port_info *d, int noisy, int *config)
 {
 	int ret;
 	u16 pcicmd;
@@ -500,9 +498,6 @@
 		printk(KERN_INFO "%s: device enabled (Linux)\n", d->name);
 	}
 
-	if (noisy)
-		printk(KERN_INFO "%s: chipset revision %d\n",
-				 d->name, dev->revision);
 out:
 	return ret;
 }
@@ -510,9 +505,9 @@
 /**
  *	ide_pci_setup_ports	-	configure ports/devices on PCI IDE
  *	@dev: PCI device
- *	@d: IDE pci device info
+ *	@d: IDE port info
  *	@pciirq: IRQ line
- *	@index: ata index to update
+ *	@idx: ATA index table to update
  *
  *	Scan the interfaces attached to this device and do any
  *	necessary per port setup. Attach the devices and ask the
@@ -522,26 +517,25 @@
  *	but is also used directly as a helper function by some controllers
  *	where the chipset setup is not the default PCI IDE one.
  */
- 
-void ide_pci_setup_ports(struct pci_dev *dev, ide_pci_device_t *d, int pciirq, ata_index_t *index)
+
+void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d, int pciirq, u8 *idx)
 {
 	int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port;
-	int at_least_one_hwif_enabled = 0;
 	ide_hwif_t *hwif, *mate = NULL;
 	u8 tmp;
 
-	index->all = 0xf0f0;
-
 	/*
 	 * Set up the IDE ports
 	 */
-	 
+
 	for (port = 0; port < channels; ++port) {
-		ide_pci_enablebit_t *e = &(d->enablebits[port]);
-	
+		const ide_pci_enablebit_t *e = &(d->enablebits[port]);
+
 		if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) ||
-		    (tmp & e->mask) != e->val))
+		    (tmp & e->mask) != e->val)) {
+			printk(KERN_INFO "%s: IDE port disabled\n", d->name);
 			continue;	/* port not enabled */
+		}
 
 		if ((hwif = ide_hwif_configure(dev, d, mate, port, pciirq)) == NULL)
 			continue;
@@ -549,11 +543,7 @@
 		/* setup proper ancestral information */
 		hwif->gendev.parent = &dev->dev;
 
-		if (hwif->channel) {
-			index->b.high = hwif->index;
-		} else {
-			index->b.low = hwif->index;
-		}
+		*(idx + port) = hwif->index;
 
 		
 		if (d->init_iops)
@@ -562,15 +552,28 @@
 		if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0)
 			ide_hwif_setup_dma(dev, d, hwif);
 
-		if ((d->host_flags & IDE_HFLAG_LEGACY_IRQS) && hwif->irq == 0)
+		if ((!hwif->irq && (d->host_flags & IDE_HFLAG_LEGACY_IRQS)) ||
+		    (d->host_flags & IDE_HFLAG_FORCE_LEGACY_IRQS))
 			hwif->irq = port ? 15 : 14;
 
+		hwif->fixup = d->fixup;
+
 		hwif->host_flags = d->host_flags;
 		hwif->pio_mask = d->pio_mask;
 
 		if ((d->host_flags & IDE_HFLAG_SERIALIZE) && hwif->mate)
 			hwif->mate->serialized = hwif->serialized = 1;
 
+		if (d->host_flags & IDE_HFLAG_IO_32BIT) {
+			hwif->drives[0].io_32bit = 1;
+			hwif->drives[1].io_32bit = 1;
+		}
+
+		if (d->host_flags & IDE_HFLAG_UNMASK_IRQS) {
+			hwif->drives[0].unmask = 1;
+			hwif->drives[1].unmask = 1;
+		}
+
 		if (hwif->dma_base) {
 			hwif->swdma_mask = d->swdma_mask;
 			hwif->mwdma_mask = d->mwdma_mask;
@@ -580,6 +583,9 @@
 		hwif->drives[0].autotune = 1;
 		hwif->drives[1].autotune = 1;
 
+		if (d->host_flags & IDE_HFLAG_RQSIZE_256)
+			hwif->rqsize = 256;
+
 		if (d->init_hwif)
 			/* Call chipset-specific routine
 			 * for each enabled hwif
@@ -587,10 +593,7 @@
 			d->init_hwif(hwif);
 
 		mate = hwif;
-		at_least_one_hwif_enabled = 1;
 	}
-	if (!at_least_one_hwif_enabled)
-		printk(KERN_INFO "%s: neither IDE port enabled (BIOS)\n", d->name);
 }
 
 EXPORT_SYMBOL_GPL(ide_pci_setup_ports);
@@ -602,13 +605,13 @@
  *
  * One thing that is not standardized is the location of the
  * primary/secondary interface "enable/disable" bits.  For chipsets that
- * we "know" about, this information is in the ide_pci_device_t struct;
+ * we "know" about, this information is in the struct ide_port_info;
  * for all other chipsets, we just assume both interfaces are enabled.
  */
-static int do_ide_setup_pci_device(struct pci_dev *dev, ide_pci_device_t *d,
-				   ata_index_t *index, u8 noisy)
+static int do_ide_setup_pci_device(struct pci_dev *dev,
+				   const struct ide_port_info *d,
+				   u8 *idx, u8 noisy)
 {
-	static ata_index_t ata_index = { .b = { .low = 0xff, .high = 0xff } };
 	int tried_config = 0;
 	int pciirq, ret;
 
@@ -658,51 +661,35 @@
 
 	/* FIXME: silent failure can happen */
 
-	*index = ata_index;
-	ide_pci_setup_ports(dev, d, pciirq, index);
+	ide_pci_setup_ports(dev, d, pciirq, idx);
 out:
 	return ret;
 }
 
-int ide_setup_pci_device(struct pci_dev *dev, ide_pci_device_t *d)
+int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
 {
-	ide_hwif_t *hwif = NULL, *mate = NULL;
-	ata_index_t index_list;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 	int ret;
 
-	ret = do_ide_setup_pci_device(dev, d, &index_list, 1);
-	if (ret < 0)
-		goto out;
+	ret = do_ide_setup_pci_device(dev, d, &idx[0], 1);
 
-	if ((index_list.b.low & 0xf0) != 0xf0)
-		hwif = &ide_hwifs[index_list.b.low];
-	if ((index_list.b.high & 0xf0) != 0xf0)
-		mate = &ide_hwifs[index_list.b.high];
+	if (ret >= 0)
+		ide_device_add(idx);
 
-	if (hwif)
-		probe_hwif_init_with_fixup(hwif, d->fixup);
-	if (mate)
-		probe_hwif_init_with_fixup(mate, d->fixup);
-
-	if (hwif)
-		ide_proc_register_port(hwif);
-	if (mate)
-		ide_proc_register_port(mate);
-out:
 	return ret;
 }
 
 EXPORT_SYMBOL_GPL(ide_setup_pci_device);
 
 int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
-			  ide_pci_device_t *d)
+			  const struct ide_port_info *d)
 {
 	struct pci_dev *pdev[] = { dev1, dev2 };
-	ata_index_t index_list[2];
 	int ret, i;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	for (i = 0; i < 2; i++) {
-		ret = do_ide_setup_pci_device(pdev[i], d, index_list + i, !i);
+		ret = do_ide_setup_pci_device(pdev[i], d, &idx[i*2], !i);
 		/*
 		 * FIXME: Mom, mom, they stole me the helper function to undo
 		 * do_ide_setup_pci_device() on the first device!
@@ -711,25 +698,7 @@
 			goto out;
 	}
 
-	for (i = 0; i < 2; i++) {
-		u8 idx[2] = { index_list[i].b.low, index_list[i].b.high };
-		int j;
-
-		for (j = 0; j < 2; j++) {
-			if ((idx[j] & 0xf0) != 0xf0)
-				probe_hwif_init(ide_hwifs + idx[j]);
-		}
-	}
-
-	for (i = 0; i < 2; i++) {
-		u8 idx[2] = { index_list[i].b.low, index_list[i].b.high };
-		int j;
-
-		for (j = 0; j < 2; j++) {
-			if ((idx[j] & 0xf0) != 0xf0)
-				ide_proc_register_port(ide_hwifs + idx[j]);
-		}
-	}
+	ide_device_add(idx);
 out:
 	return ret;
 }
@@ -754,9 +723,6 @@
  *	hands the controllers off to the core PCI code to do the rest of
  *	the work.
  *
- *	The driver_data of the driver table must point to an ide_pci_device_t
- *	describing the interface.
- *
  *	Returns are the same as for pci_register_driver
  */
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93644f8..d08fb30 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2797,11 +2797,12 @@
 
 static int cma_init(void)
 {
-	int ret, low, high;
+	int ret, low, high, remaining;
 
 	get_random_bytes(&next_port, sizeof next_port);
 	inet_get_local_port_range(&low, &high);
-	next_port = ((unsigned int) next_port % (high - low)) + low;
+	remaining = (high - low) + 1;
+	next_port = ((unsigned int) next_port % remaining) + low;
 
 	cma_wq = create_singlethread_workqueue("rdma_cm");
 	if (!cma_wq)
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 1d62c8b..e5b4e9b 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -495,7 +495,7 @@
 #ifdef CONFIG_COMPAT
 
 #define BITS_PER_LONG_COMPAT (sizeof(compat_long_t) * 8)
-#define NBITS_COMPAT(x) ((((x) - 1) / BITS_PER_LONG_COMPAT) + 1)
+#define BITS_TO_LONGS_COMPAT(x) ((((x) - 1) / BITS_PER_LONG_COMPAT) + 1)
 
 #ifdef __BIG_ENDIAN
 static int bits_to_user(unsigned long *bits, unsigned int maxbit,
@@ -504,7 +504,7 @@
 	int len, i;
 
 	if (compat) {
-		len = NBITS_COMPAT(maxbit) * sizeof(compat_long_t);
+		len = BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t);
 		if (len > maxlen)
 			len = maxlen;
 
@@ -515,7 +515,7 @@
 					 sizeof(compat_long_t)))
 				return -EFAULT;
 	} else {
-		len = NBITS(maxbit) * sizeof(long);
+		len = BITS_TO_LONGS(maxbit) * sizeof(long);
 		if (len > maxlen)
 			len = maxlen;
 
@@ -530,8 +530,8 @@
 			unsigned int maxlen, void __user *p, int compat)
 {
 	int len = compat ?
-			NBITS_COMPAT(maxbit) * sizeof(compat_long_t) :
-			NBITS(maxbit) * sizeof(long);
+			BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t) :
+			BITS_TO_LONGS(maxbit) * sizeof(long);
 
 	if (len > maxlen)
 		len = maxlen;
@@ -545,7 +545,7 @@
 static int bits_to_user(unsigned long *bits, unsigned int maxbit,
 			unsigned int maxlen, void __user *p, int compat)
 {
-	int len = NBITS(maxbit) * sizeof(long);
+	int len = BITS_TO_LONGS(maxbit) * sizeof(long);
 
 	if (len > maxlen)
 		len = maxlen;
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index ec1b6cf..bfc6061 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -136,7 +136,8 @@
 	}
 
 	gameport_close(gameport);
-	return (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx);
+	return (cpu_data(raw_smp_processor_id()).loops_per_jiffy *
+		(unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx);
 
 #else
 
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 2f2b020..307c7b5 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -584,10 +584,10 @@
 
 
 #define MATCH_BIT(bit, max) \
-		for (i = 0; i < NBITS(max); i++) \
+		for (i = 0; i < BITS_TO_LONGS(max); i++) \
 			if ((id->bit[i] & dev->bit[i]) != id->bit[i]) \
 				break; \
-		if (i != NBITS(max)) \
+		if (i != BITS_TO_LONGS(max)) \
 			continue;
 
 static const struct input_device_id *input_match_device(const struct input_device_id *id,
@@ -698,7 +698,7 @@
 {
 	int i;
 
-	for (i = NBITS(max) - 1; i > 0; i--)
+	for (i = BITS_TO_LONGS(max) - 1; i > 0; i--)
 		if (bitmap[i])
 			break;
 
@@ -892,7 +892,7 @@
 
 	len += snprintf(buf, max(size, 0), "%c", name);
 	for (i = min_bit; i < max_bit; i++)
-		if (bm[LONG(i)] & BIT(i))
+		if (bm[BIT_WORD(i)] & BIT_MASK(i))
 			len += snprintf(buf + len, max(size - len, 0), "%X,", i);
 	return len;
 }
@@ -991,7 +991,7 @@
 	int i;
 	int len = 0;
 
-	for (i = NBITS(max) - 1; i > 0; i--)
+	for (i = BITS_TO_LONGS(max) - 1; i > 0; i--)
 		if (bitmap[i])
 			break;
 
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 2b201f9..22b2789 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -844,8 +844,8 @@
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT(EV_KEY) },
-		.keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
 	},	/* Avoid itouchpads, touchscreens and tablets */
 	{ }	/* Terminating entry */
 };
@@ -854,20 +854,20 @@
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_ABS) },
-		.absbit = { BIT(ABS_X) },
+		.evbit = { BIT_MASK(EV_ABS) },
+		.absbit = { BIT_MASK(ABS_X) },
 	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_ABS) },
-		.absbit = { BIT(ABS_WHEEL) },
+		.evbit = { BIT_MASK(EV_ABS) },
+		.absbit = { BIT_MASK(ABS_WHEEL) },
 	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_ABS) },
-		.absbit = { BIT(ABS_THROTTLE) },
+		.evbit = { BIT_MASK(EV_ABS) },
+		.absbit = { BIT_MASK(ABS_THROTTLE) },
 	},
 	{ }	/* Terminating entry */
 };
diff --git a/drivers/input/joystick/a3d.c b/drivers/input/joystick/a3d.c
index ff701ab..52ba16f 100644
--- a/drivers/input/joystick/a3d.c
+++ b/drivers/input/joystick/a3d.c
@@ -326,14 +326,19 @@
 
 		a3d->length = 33;
 
-		input_dev->evbit[0] |= BIT(EV_ABS) | BIT(EV_KEY) | BIT(EV_REL);
-		input_dev->relbit[0] |= BIT(REL_X) | BIT(REL_Y);
-		input_dev->absbit[0] |= BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_THROTTLE) | BIT(ABS_RUDDER)
-					| BIT(ABS_HAT0X) | BIT(ABS_HAT0Y) | BIT(ABS_HAT1X) | BIT(ABS_HAT1Y);
-		input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_RIGHT) | BIT(BTN_LEFT) | BIT(BTN_MIDDLE)
-							| BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-		input_dev->keybit[LONG(BTN_JOYSTICK)] |= BIT(BTN_TRIGGER) | BIT(BTN_THUMB) | BIT(BTN_TOP)
-							| BIT(BTN_PINKIE);
+		input_dev->evbit[0] |= BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY) |
+			BIT_MASK(EV_REL);
+		input_dev->relbit[0] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+		input_dev->absbit[0] |= BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+			BIT_MASK(ABS_THROTTLE) | BIT_MASK(ABS_RUDDER) |
+			BIT_MASK(ABS_HAT0X) | BIT_MASK(ABS_HAT0Y) |
+			BIT_MASK(ABS_HAT1X) | BIT_MASK(ABS_HAT1Y);
+		input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_RIGHT) |
+			BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE) |
+			BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
+		input_dev->keybit[BIT_WORD(BTN_JOYSTICK)] |=
+			BIT_MASK(BTN_TRIGGER) | BIT_MASK(BTN_THUMB) |
+			BIT_MASK(BTN_TOP) | BIT_MASK(BTN_PINKIE);
 
 		a3d_read(a3d, data);
 
@@ -348,9 +353,10 @@
 	} else {
 		a3d->length = 29;
 
-		input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_REL);
-		input_dev->relbit[0] |= BIT(REL_X) | BIT(REL_Y);
-		input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_RIGHT) | BIT(BTN_LEFT) | BIT(BTN_MIDDLE);
+		input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+		input_dev->relbit[0] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+		input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_RIGHT) |
+			BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE);
 
 		a3d_read(a3d, data);
 
diff --git a/drivers/input/joystick/adi.c b/drivers/input/joystick/adi.c
index 28140c4..d1ca8a1 100644
--- a/drivers/input/joystick/adi.c
+++ b/drivers/input/joystick/adi.c
@@ -431,7 +431,7 @@
 	input_dev->open = adi_open;
 	input_dev->close = adi_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < adi->axes10 + adi->axes8 + (adi->hats + (adi->pad != -1)) * 2; i++)
 		set_bit(adi->abs[i], input_dev->absbit);
diff --git a/drivers/input/joystick/amijoy.c b/drivers/input/joystick/amijoy.c
index b0f5541..5cf9f36 100644
--- a/drivers/input/joystick/amijoy.c
+++ b/drivers/input/joystick/amijoy.c
@@ -137,9 +137,10 @@
 		amijoy_dev[i]->open = amijoy_open;
 		amijoy_dev[i]->close = amijoy_close;
 
-		amijoy_dev[i]->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-		amijoy_dev[i]->absbit[0] = BIT(ABS_X) | BIT(ABS_Y);
-		amijoy_dev[i]->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+		amijoy_dev[i]->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+		amijoy_dev[i]->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y);
+		amijoy_dev[i]->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+			BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 		for (j = 0; j < 2; j++) {
 			amijoy_dev[i]->absmin[ABS_X + j] = -1;
 			amijoy_dev[i]->absmax[ABS_X + j] = 1;
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index bdd157c..1573988 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -456,7 +456,7 @@
 	input_dev->open = analog_open;
 	input_dev->close = analog_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = j = 0; i < 4; i++)
 		if (analog->mask & (1 << i)) {
diff --git a/drivers/input/joystick/cobra.c b/drivers/input/joystick/cobra.c
index d3352a8..55646a6 100644
--- a/drivers/input/joystick/cobra.c
+++ b/drivers/input/joystick/cobra.c
@@ -218,7 +218,7 @@
 		input_dev->open = cobra_open;
 		input_dev->close = cobra_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 		input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0);
 		input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0);
 		for (j = 0; cobra_btn[j]; j++)
diff --git a/drivers/input/joystick/db9.c b/drivers/input/joystick/db9.c
index b069ee1..27fc475 100644
--- a/drivers/input/joystick/db9.c
+++ b/drivers/input/joystick/db9.c
@@ -631,7 +631,7 @@
 		input_dev->open = db9_open;
 		input_dev->close = db9_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 		for (j = 0; j < db9_mode->n_buttons; j++)
 			set_bit(db9_mode->buttons[j], input_dev->keybit);
 		for (j = 0; j < db9_mode->n_axis; j++) {
diff --git a/drivers/input/joystick/gamecon.c b/drivers/input/joystick/gamecon.c
index 1a452e0..df2a9d0 100644
--- a/drivers/input/joystick/gamecon.c
+++ b/drivers/input/joystick/gamecon.c
@@ -653,12 +653,12 @@
 	input_dev->close = gc_close;
 
 	if (pad_type != GC_SNESMOUSE) {
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 		for (i = 0; i < 2; i++)
 			input_set_abs_params(input_dev, ABS_X + i, -1, 1, 0, 0);
 	} else
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
 
 	gc->pads[0] |= gc_status_bit[idx];
 	gc->pads[pad_type] |= gc_status_bit[idx];
diff --git a/drivers/input/joystick/gf2k.c b/drivers/input/joystick/gf2k.c
index d514aeb..1f6302c 100644
--- a/drivers/input/joystick/gf2k.c
+++ b/drivers/input/joystick/gf2k.c
@@ -315,7 +315,7 @@
 	input_dev->open = gf2k_open;
 	input_dev->close = gf2k_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < gf2k_axes[gf2k->id]; i++)
 		set_bit(gf2k_abs[i], input_dev->absbit);
diff --git a/drivers/input/joystick/grip.c b/drivers/input/joystick/grip.c
index 73eb5ab..fd3853a 100644
--- a/drivers/input/joystick/grip.c
+++ b/drivers/input/joystick/grip.c
@@ -370,7 +370,7 @@
 		input_dev->open = grip_open;
 		input_dev->close = grip_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 		for (j = 0; (t = grip_abs[grip->mode[i]][j]) >= 0; j++) {
 
diff --git a/drivers/input/joystick/grip_mp.c b/drivers/input/joystick/grip_mp.c
index 4ed3a3e..c57e21d 100644
--- a/drivers/input/joystick/grip_mp.c
+++ b/drivers/input/joystick/grip_mp.c
@@ -606,7 +606,7 @@
 	input_dev->open = grip_open;
 	input_dev->close = grip_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (j = 0; (t = grip_abs[port->mode][j]) >= 0; j++)
 		input_set_abs_params(input_dev, t, -1, 1, 0, 0);
diff --git a/drivers/input/joystick/guillemot.c b/drivers/input/joystick/guillemot.c
index d4e8073..aa6bfb3 100644
--- a/drivers/input/joystick/guillemot.c
+++ b/drivers/input/joystick/guillemot.c
@@ -238,7 +238,7 @@
 	input_dev->open = guillemot_open;
 	input_dev->close = guillemot_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; (t = guillemot->type->abs[i]) >= 0; i++)
 		input_set_abs_params(input_dev, t, 0, 255, 0, 0);
diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
index 682244b..6f826b3 100644
--- a/drivers/input/joystick/iforce/iforce-main.c
+++ b/drivers/input/joystick/iforce/iforce-main.c
@@ -389,7 +389,8 @@
  * Set input device bitfields and ranges.
  */
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_FF_STATUS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+		BIT_MASK(EV_FF_STATUS);
 
 	for (i = 0; iforce->type->btn[i] >= 0; i++)
 		set_bit(iforce->type->btn[i], input_dev->keybit);
diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h
index 40a853a..a964a7c 100644
--- a/drivers/input/joystick/iforce/iforce.h
+++ b/drivers/input/joystick/iforce/iforce.h
@@ -62,13 +62,13 @@
 #define FF_CORE_IS_PLAYED	3	/* Effect is currently being played */
 #define FF_CORE_SHOULD_PLAY	4	/* User wants the effect to be played */
 #define FF_CORE_UPDATE		5	/* Effect is being updated */
-#define FF_MODCORE_MAX		5
+#define FF_MODCORE_CNT		6
 
 struct iforce_core_effect {
 	/* Information about where modifiers are stored in the device's memory */
 	struct resource mod1_chunk;
 	struct resource mod2_chunk;
-	unsigned long flags[NBITS(FF_MODCORE_MAX)];
+	unsigned long flags[BITS_TO_LONGS(FF_MODCORE_CNT)];
 };
 
 #define FF_CMD_EFFECT		0x010e
diff --git a/drivers/input/joystick/interact.c b/drivers/input/joystick/interact.c
index 1aec1e9..bc8ea95 100644
--- a/drivers/input/joystick/interact.c
+++ b/drivers/input/joystick/interact.c
@@ -269,7 +269,7 @@
 	input_dev->open = interact_open;
 	input_dev->close = interact_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; (t = interact_type[interact->type].abs[i]) >= 0; i++) {
 		set_bit(t, input_dev->absbit);
diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c
index b35604e..54e6769 100644
--- a/drivers/input/joystick/magellan.c
+++ b/drivers/input/joystick/magellan.c
@@ -170,7 +170,7 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < 9; i++)
 		set_bit(magellan_buttons[i], input_dev->keybit);
diff --git a/drivers/input/joystick/sidewinder.c b/drivers/input/joystick/sidewinder.c
index 2adf73f..7b4865f 100644
--- a/drivers/input/joystick/sidewinder.c
+++ b/drivers/input/joystick/sidewinder.c
@@ -758,7 +758,7 @@
 		input_dev->open = sw_open;
 		input_dev->close = sw_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 		for (j = 0; (bits = sw_bit[sw->type][j]); j++) {
 			code = sw_abs[sw->type][j];
diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c
index abb7c4c..d4087fd 100644
--- a/drivers/input/joystick/spaceball.c
+++ b/drivers/input/joystick/spaceball.c
@@ -228,18 +228,23 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	switch (id) {
 		case SPACEBALL_4000FLX:
 		case SPACEBALL_4000FLX_L:
-			input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_9);
-			input_dev->keybit[LONG(BTN_A)] |= BIT(BTN_A) | BIT(BTN_B) | BIT(BTN_C) | BIT(BTN_MODE);
+			input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_9);
+			input_dev->keybit[BIT_WORD(BTN_A)] |= BIT_MASK(BTN_A) |
+				BIT_MASK(BTN_B) | BIT_MASK(BTN_C) |
+				BIT_MASK(BTN_MODE);
 		default:
-			input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_2) | BIT(BTN_3) | BIT(BTN_4)
-				| BIT(BTN_5) | BIT(BTN_6) | BIT(BTN_7) | BIT(BTN_8);
+			input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_2) |
+				BIT_MASK(BTN_3) | BIT_MASK(BTN_4) |
+				BIT_MASK(BTN_5) | BIT_MASK(BTN_6) |
+				BIT_MASK(BTN_7) | BIT_MASK(BTN_8);
 		case SPACEBALL_3003C:
-			input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_1) | BIT(BTN_8);
+			input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_1) |
+				BIT_MASK(BTN_8);
 	}
 
 	for (i = 0; i < 3; i++) {
diff --git a/drivers/input/joystick/spaceorb.c b/drivers/input/joystick/spaceorb.c
index c4937f1..f7ce400 100644
--- a/drivers/input/joystick/spaceorb.c
+++ b/drivers/input/joystick/spaceorb.c
@@ -185,7 +185,7 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < 6; i++)
 		set_bit(spaceorb_buttons[i], input_dev->keybit);
diff --git a/drivers/input/joystick/stinger.c b/drivers/input/joystick/stinger.c
index 8581ee9..baa10b2 100644
--- a/drivers/input/joystick/stinger.c
+++ b/drivers/input/joystick/stinger.c
@@ -156,10 +156,11 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_A)] = BIT(BTN_A) | BIT(BTN_B) | BIT(BTN_C) | BIT(BTN_X) |
-					 BIT(BTN_Y) | BIT(BTN_Z) | BIT(BTN_TL) | BIT(BTN_TR) |
-					 BIT(BTN_START) | BIT(BTN_SELECT);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_A)] = BIT_MASK(BTN_A) | BIT_MASK(BTN_B) |
+		BIT_MASK(BTN_C) | BIT_MASK(BTN_X) | BIT_MASK(BTN_Y) |
+		BIT_MASK(BTN_Z) | BIT_MASK(BTN_TL) | BIT_MASK(BTN_TR) |
+		BIT_MASK(BTN_START) | BIT_MASK(BTN_SELECT);
 	input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 4);
 	input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 4);
 
diff --git a/drivers/input/joystick/tmdc.c b/drivers/input/joystick/tmdc.c
index 3b36ee0..0feeb8a 100644
--- a/drivers/input/joystick/tmdc.c
+++ b/drivers/input/joystick/tmdc.c
@@ -333,7 +333,7 @@
 	input_dev->open = tmdc_open;
 	input_dev->close = tmdc_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < port->absc && i < TMDC_ABS; i++)
 		if (port->abs[i] >= 0)
diff --git a/drivers/input/joystick/turbografx.c b/drivers/input/joystick/turbografx.c
index 8381c6f..bbebd4e 100644
--- a/drivers/input/joystick/turbografx.c
+++ b/drivers/input/joystick/turbografx.c
@@ -229,7 +229,7 @@
 		input_dev->open = tgfx_open;
 		input_dev->close = tgfx_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 		input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0);
 		input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0);
 
diff --git a/drivers/input/joystick/twidjoy.c b/drivers/input/joystick/twidjoy.c
index c91504e..1085c84 100644
--- a/drivers/input/joystick/twidjoy.c
+++ b/drivers/input/joystick/twidjoy.c
@@ -207,7 +207,7 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 	input_set_abs_params(input_dev, ABS_X, -50, 50, 4, 4);
 	input_set_abs_params(input_dev, ABS_Y, -50, 50, 4, 4);
 
diff --git a/drivers/input/joystick/warrior.c b/drivers/input/joystick/warrior.c
index 4e85f72..e928b6e 100644
--- a/drivers/input/joystick/warrior.c
+++ b/drivers/input/joystick/warrior.c
@@ -162,9 +162,11 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TRIGGER)] = BIT(BTN_TRIGGER) | BIT(BTN_THUMB) | BIT(BTN_TOP) | BIT(BTN_TOP2);
-	input_dev->relbit[0] = BIT(REL_DIAL);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) |
+		BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TRIGGER)] = BIT_MASK(BTN_TRIGGER) |
+		BIT_MASK(BTN_THUMB) | BIT_MASK(BTN_TOP) | BIT_MASK(BTN_TOP2);
+	input_dev->relbit[0] = BIT_MASK(REL_DIAL);
 	input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 8);
 	input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 8);
 	input_set_abs_params(input_dev, ABS_THROTTLE, -112, 112, 0, 0);
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 623629a..6dd3758 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -658,7 +658,7 @@
 	input_dev->open = xpad_open;
 	input_dev->close = xpad_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	/* set up buttons */
 	for (i = 0; xpad_btn[i] >= 0; i++)
diff --git a/drivers/input/keyboard/aaed2000_kbd.c b/drivers/input/keyboard/aaed2000_kbd.c
index 63d6ead..72abc19 100644
--- a/drivers/input/keyboard/aaed2000_kbd.c
+++ b/drivers/input/keyboard/aaed2000_kbd.c
@@ -125,7 +125,7 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &pdev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = aaedkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(aaedkbd_keycode);
diff --git a/drivers/input/keyboard/amikbd.c b/drivers/input/keyboard/amikbd.c
index c67e84e..81bf756 100644
--- a/drivers/input/keyboard/amikbd.c
+++ b/drivers/input/keyboard/amikbd.c
@@ -209,7 +209,7 @@
 	amikbd_dev->id.product = 0x0001;
 	amikbd_dev->id.version = 0x0100;
 
-	amikbd_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	amikbd_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 
 	for (i = 0; i < 0x78; i++)
 		set_bit(i, amikbd_dev->keybit);
diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index a180015..4e92100 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -237,7 +237,7 @@
 	atakbd_dev->id.product = 0x0001;
 	atakbd_dev->id.version = 0x0100;
 
-	atakbd_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	atakbd_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	atakbd_dev->keycode = atakbd_keycode;
 	atakbd_dev->keycodesize = sizeof(unsigned char);
 	atakbd_dev->keycodemax = ARRAY_SIZE(atakbd_keycode);
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 41fc3d0..b39c5b3 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -900,27 +900,32 @@
 
 	input_set_drvdata(input_dev, atkbd);
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_MSC);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+		BIT_MASK(EV_MSC);
 
 	if (atkbd->write) {
-		input_dev->evbit[0] |= BIT(EV_LED);
-		input_dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL);
+		input_dev->evbit[0] |= BIT_MASK(EV_LED);
+		input_dev->ledbit[0] = BIT_MASK(LED_NUML) |
+			BIT_MASK(LED_CAPSL) | BIT_MASK(LED_SCROLLL);
 	}
 
 	if (atkbd->extra)
-		input_dev->ledbit[0] |= BIT(LED_COMPOSE) | BIT(LED_SUSPEND) |
-					BIT(LED_SLEEP) | BIT(LED_MUTE) | BIT(LED_MISC);
+		input_dev->ledbit[0] |= BIT_MASK(LED_COMPOSE) |
+			BIT_MASK(LED_SUSPEND) | BIT_MASK(LED_SLEEP) |
+			BIT_MASK(LED_MUTE) | BIT_MASK(LED_MISC);
 
 	if (!atkbd->softrepeat) {
 		input_dev->rep[REP_DELAY] = 250;
 		input_dev->rep[REP_PERIOD] = 33;
 	}
 
-	input_dev->mscbit[0] = atkbd->softraw ? BIT(MSC_SCAN) : BIT(MSC_RAW) | BIT(MSC_SCAN);
+	input_dev->mscbit[0] = atkbd->softraw ? BIT_MASK(MSC_SCAN) :
+		BIT_MASK(MSC_RAW) | BIT_MASK(MSC_SCAN);
 
 	if (atkbd->scroll) {
-		input_dev->evbit[0] |= BIT(EV_REL);
-		input_dev->relbit[0] = BIT(REL_WHEEL) | BIT(REL_HWHEEL);
+		input_dev->evbit[0] |= BIT_MASK(EV_REL);
+		input_dev->relbit[0] = BIT_MASK(REL_WHEEL) |
+			BIT_MASK(REL_HWHEEL);
 		set_bit(BTN_MIDDLE, input_dev->keybit);
 	}
 
diff --git a/drivers/input/keyboard/corgikbd.c b/drivers/input/keyboard/corgikbd.c
index 6578bff..790fed3 100644
--- a/drivers/input/keyboard/corgikbd.c
+++ b/drivers/input/keyboard/corgikbd.c
@@ -325,7 +325,8 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &pdev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR) | BIT(EV_SW);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+		BIT_MASK(EV_PWR) | BIT_MASK(EV_SW);
 	input_dev->keycode = corgikbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(corgikbd_keycode);
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index e2a3293..3eddf52 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -62,7 +62,7 @@
 
 	platform_set_drvdata(pdev, input);
 
-	input->evbit[0] = BIT(EV_KEY);
+	input->evbit[0] = BIT_MASK(EV_KEY);
 
 	input->name = pdev->name;
 	input->phys = "gpio-keys/input0";
diff --git a/drivers/input/keyboard/hil_kbd.c b/drivers/input/keyboard/hil_kbd.c
index cdd254f..adbf29f 100644
--- a/drivers/input/keyboard/hil_kbd.c
+++ b/drivers/input/keyboard/hil_kbd.c
@@ -323,8 +323,9 @@
 		goto bail2;
 	}
 
-	kbd->dev->evbit[0]	= BIT(EV_KEY) | BIT(EV_REP);
-	kbd->dev->ledbit[0]	= BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL);
+	kbd->dev->evbit[0]	= BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+	kbd->dev->ledbit[0]	= BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
+		BIT_MASK(LED_SCROLLL);
 	kbd->dev->keycodemax	= HIL_KEYCODES_SET1_TBLSIZE;
 	kbd->dev->keycodesize	= sizeof(hil_kbd_set1[0]);
 	kbd->dev->keycode	= hil_kbd_set1;
diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c
index 499b697..50d80ec 100644
--- a/drivers/input/keyboard/hilkbd.c
+++ b/drivers/input/keyboard/hilkbd.c
@@ -266,8 +266,9 @@
 		if (hphilkeyb_keycode[i] != KEY_RESERVED)
 			set_bit(hphilkeyb_keycode[i], hil_dev.dev->keybit);
 
-	hil_dev.dev->evbit[0]	= BIT(EV_KEY) | BIT(EV_REP);
-	hil_dev.dev->ledbit[0]	= BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL);
+	hil_dev.dev->evbit[0]	= BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+	hil_dev.dev->ledbit[0]	= BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
+		BIT_MASK(LED_SCROLLL);
 	hil_dev.dev->keycodemax	= HIL_KEYCODES_SET1_TBLSIZE;
 	hil_dev.dev->keycodesize= sizeof(hphilkeyb_keycode[0]);
 	hil_dev.dev->keycode	= hphilkeyb_keycode;
diff --git a/drivers/input/keyboard/locomokbd.c b/drivers/input/keyboard/locomokbd.c
index 7a41b27..5a0ca18 100644
--- a/drivers/input/keyboard/locomokbd.c
+++ b/drivers/input/keyboard/locomokbd.c
@@ -233,7 +233,7 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = locomokbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(locomokbd_keycode);
diff --git a/drivers/input/keyboard/newtonkbd.c b/drivers/input/keyboard/newtonkbd.c
index b97a41e..48d1cab 100644
--- a/drivers/input/keyboard/newtonkbd.c
+++ b/drivers/input/keyboard/newtonkbd.c
@@ -106,7 +106,7 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = nkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(nkbd_keycode);
diff --git a/drivers/input/keyboard/pxa27x_keyboard.c b/drivers/input/keyboard/pxa27x_keyboard.c
index b7061aa..bdd64ee 100644
--- a/drivers/input/keyboard/pxa27x_keyboard.c
+++ b/drivers/input/keyboard/pxa27x_keyboard.c
@@ -183,8 +183,9 @@
 	input_dev->close = pxakbd_close;
 	input_dev->dev.parent = &pdev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_REL);
-	input_dev->relbit[LONG(REL_WHEEL)] = BIT(REL_WHEEL);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+		BIT_MASK(EV_REL);
+	input_dev->relbit[BIT_WORD(REL_WHEEL)] = BIT_MASK(REL_WHEEL);
 	for (row = 0; row < pdata->nr_rows; row++) {
 		for (col = 0; col < pdata->nr_cols; col++) {
 			int code = pdata->keycodes[row][col];
diff --git a/drivers/input/keyboard/spitzkbd.c b/drivers/input/keyboard/spitzkbd.c
index 41b8038..410d78a 100644
--- a/drivers/input/keyboard/spitzkbd.c
+++ b/drivers/input/keyboard/spitzkbd.c
@@ -381,7 +381,8 @@
 	input_dev->id.product = 0x0001;
 	input_dev->id.version = 0x0100;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR) | BIT(EV_SW);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+		BIT_MASK(EV_PWR) | BIT_MASK(EV_SW);
 	input_dev->keycode = spitzkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(spitzkbd_keycode);
diff --git a/drivers/input/keyboard/stowaway.c b/drivers/input/keyboard/stowaway.c
index b44b068..7437219 100644
--- a/drivers/input/keyboard/stowaway.c
+++ b/drivers/input/keyboard/stowaway.c
@@ -110,7 +110,7 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = skbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(skbd_keycode);
diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c
index 1d4e396..be0f5d1 100644
--- a/drivers/input/keyboard/sunkbd.c
+++ b/drivers/input/keyboard/sunkbd.c
@@ -277,9 +277,11 @@
 
 	input_dev->event = sunkbd_event;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_SND) | BIT(EV_REP);
-	input_dev->ledbit[0] = BIT(LED_CAPSL) | BIT(LED_COMPOSE) | BIT(LED_SCROLLL) | BIT(LED_NUML);
-	input_dev->sndbit[0] = BIT(SND_CLICK) | BIT(SND_BELL);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
+		BIT_MASK(EV_SND) | BIT_MASK(EV_REP);
+	input_dev->ledbit[0] = BIT_MASK(LED_CAPSL) | BIT_MASK(LED_COMPOSE) |
+		BIT_MASK(LED_SCROLLL) | BIT_MASK(LED_NUML);
+	input_dev->sndbit[0] = BIT_MASK(SND_CLICK) | BIT_MASK(SND_BELL);
 
 	input_dev->keycode = sunkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
diff --git a/drivers/input/keyboard/xtkbd.c b/drivers/input/keyboard/xtkbd.c
index f3a56eb..152a2c0 100644
--- a/drivers/input/keyboard/xtkbd.c
+++ b/drivers/input/keyboard/xtkbd.c
@@ -110,7 +110,7 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = xtkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(xtkbd_keycode);
diff --git a/drivers/input/misc/ati_remote.c b/drivers/input/misc/ati_remote.c
index 471aab2..3a79374 100644
--- a/drivers/input/misc/ati_remote.c
+++ b/drivers/input/misc/ati_remote.c
@@ -662,10 +662,10 @@
 	struct input_dev *idev = ati_remote->idev;
 	int i;
 
-	idev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	idev->keybit[LONG(BTN_MOUSE)] = ( BIT(BTN_LEFT) | BIT(BTN_RIGHT) |
-					  BIT(BTN_SIDE) | BIT(BTN_EXTRA) );
-	idev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
+	idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 	for (i = 0; ati_remote_tbl[i].kind != KIND_END; i++)
 		if (ati_remote_tbl[i].type == EV_KEY)
 			set_bit(ati_remote_tbl[i].code, idev->keybit);
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index 1031543..f2709b8 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -346,9 +346,10 @@
 	ar2->idev = idev;
 	input_set_drvdata(idev, ar2);
 
-	idev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_REL);
-	idev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT);
-	idev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_REL);
+	idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT);
+	idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 	for (i = 0; ati_remote2_key_table[i].key_code != KEY_RESERVED; i++)
 		set_bit(ati_remote2_key_table[i].key_code, idev->keybit);
 
diff --git a/drivers/input/misc/atlas_btns.c b/drivers/input/misc/atlas_btns.c
index e43e92f..4e3ad65 100644
--- a/drivers/input/misc/atlas_btns.c
+++ b/drivers/input/misc/atlas_btns.c
@@ -81,7 +81,7 @@
 	input_dev->name = "Atlas ACPI button driver";
 	input_dev->phys = "ASIM0000/atlas/input0";
 	input_dev->id.bustype = BUS_HOST;
-	input_dev->evbit[LONG(EV_KEY)] = BIT(EV_KEY);
+	input_dev->evbit[BIT_WORD(EV_KEY)] = BIT_MASK(EV_KEY);
 
 	set_bit(KEY_F1, input_dev->keybit);
 	set_bit(KEY_F2, input_dev->keybit);
diff --git a/drivers/input/misc/cobalt_btns.c b/drivers/input/misc/cobalt_btns.c
index 064b079..1aef97e 100644
--- a/drivers/input/misc/cobalt_btns.c
+++ b/drivers/input/misc/cobalt_btns.c
@@ -104,7 +104,7 @@
 	input->id.bustype = BUS_HOST;
 	input->cdev.dev = &pdev->dev;
 
-	input->evbit[0] = BIT(EV_KEY);
+	input->evbit[0] = BIT_MASK(EV_KEY);
 	for (i = 0; i < ARRAY_SIZE(buttons_map); i++) {
 		set_bit(buttons_map[i].keycode, input->keybit);
 		buttons_map[i].count = 0;
diff --git a/drivers/input/misc/ixp4xx-beeper.c b/drivers/input/misc/ixp4xx-beeper.c
index e759944..d2ade74 100644
--- a/drivers/input/misc/ixp4xx-beeper.c
+++ b/drivers/input/misc/ixp4xx-beeper.c
@@ -109,8 +109,8 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_SND);
-	input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	input_dev->evbit[0] = BIT_MASK(EV_SND);
+	input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 	input_dev->event = ixp4xx_spkr_event;
 
 	err = request_irq(IRQ_IXP4XX_TIMER2, &ixp4xx_spkr_interrupt,
diff --git a/drivers/input/misc/keyspan_remote.c b/drivers/input/misc/keyspan_remote.c
index 1bffc9f..fd74347 100644
--- a/drivers/input/misc/keyspan_remote.c
+++ b/drivers/input/misc/keyspan_remote.c
@@ -497,7 +497,7 @@
 	usb_to_input_id(udev, &input_dev->id);
 	input_dev->dev.parent = &interface->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY);		/* We will only report KEY events. */
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);		/* We will only report KEY events. */
 	for (i = 0; i < ARRAY_SIZE(keyspan_key_table); i++)
 		if (keyspan_key_table[i] != KEY_RESERVED)
 			set_bit(keyspan_key_table[i], input_dev->keybit);
diff --git a/drivers/input/misc/m68kspkr.c b/drivers/input/misc/m68kspkr.c
index e9f26e7..0c64d9b 100644
--- a/drivers/input/misc/m68kspkr.c
+++ b/drivers/input/misc/m68kspkr.c
@@ -65,8 +65,8 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_SND);
-	input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	input_dev->evbit[0] = BIT_MASK(EV_SND);
+	input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 	input_dev->event = m68kspkr_event;
 
 	err = input_register_device(input_dev);
diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c
index c19f77f..4941a9e 100644
--- a/drivers/input/misc/pcspkr.c
+++ b/drivers/input/misc/pcspkr.c
@@ -86,8 +86,8 @@
 	pcspkr_dev->id.version = 0x0100;
 	pcspkr_dev->dev.parent = &dev->dev;
 
-	pcspkr_dev->evbit[0] = BIT(EV_SND);
-	pcspkr_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	pcspkr_dev->evbit[0] = BIT_MASK(EV_SND);
+	pcspkr_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 	pcspkr_dev->event = pcspkr_event;
 
 	err = input_register_device(pcspkr_dev);
diff --git a/drivers/input/misc/powermate.c b/drivers/input/misc/powermate.c
index 448a470..7a7b8c7 100644
--- a/drivers/input/misc/powermate.c
+++ b/drivers/input/misc/powermate.c
@@ -363,10 +363,11 @@
 
 	input_dev->event = powermate_input_event;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_MSC);
-	input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
-	input_dev->relbit[LONG(REL_DIAL)] = BIT(REL_DIAL);
-	input_dev->mscbit[LONG(MSC_PULSELED)] = BIT(MSC_PULSELED);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) |
+		BIT_MASK(EV_MSC);
+	input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
+	input_dev->relbit[BIT_WORD(REL_DIAL)] = BIT_MASK(REL_DIAL);
+	input_dev->mscbit[BIT_WORD(MSC_PULSELED)] = BIT_MASK(MSC_PULSELED);
 
 	/* get a handle to the interrupt data pipe */
 	pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress);
diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c
index e36ec1d..a3637d8 100644
--- a/drivers/input/misc/sparcspkr.c
+++ b/drivers/input/misc/sparcspkr.c
@@ -115,8 +115,8 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = dev;
 
-	input_dev->evbit[0] = BIT(EV_SND);
-	input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	input_dev->evbit[0] = BIT_MASK(EV_SND);
+	input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 
 	input_dev->event = state->event;
 
diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c
index ab15880..46279ef 100644
--- a/drivers/input/misc/yealink.c
+++ b/drivers/input/misc/yealink.c
@@ -945,7 +945,7 @@
 	/* input_dev->event = input_ev;	TODO */
 
 	/* register available key events */
-	input_dev->evbit[0] = BIT(EV_KEY);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
 	for (i = 0; i < 256; i++) {
 		int k = map_p1k_to_key(i);
 		if (k >= 0) {
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 64d70a9..2b5ed11 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -455,24 +455,25 @@
 	if (alps_hw_init(psmouse, &version))
 		goto init_fail;
 
-	dev1->evbit[LONG(EV_KEY)] |= BIT(EV_KEY);
-	dev1->keybit[LONG(BTN_TOUCH)] |= BIT(BTN_TOUCH);
-	dev1->keybit[LONG(BTN_TOOL_FINGER)] |= BIT(BTN_TOOL_FINGER);
-	dev1->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+	dev1->evbit[BIT_WORD(EV_KEY)] |= BIT_MASK(EV_KEY);
+	dev1->keybit[BIT_WORD(BTN_TOUCH)] |= BIT_MASK(BTN_TOUCH);
+	dev1->keybit[BIT_WORD(BTN_TOOL_FINGER)] |= BIT_MASK(BTN_TOOL_FINGER);
+	dev1->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 
-	dev1->evbit[LONG(EV_ABS)] |= BIT(EV_ABS);
+	dev1->evbit[BIT_WORD(EV_ABS)] |= BIT_MASK(EV_ABS);
 	input_set_abs_params(dev1, ABS_X, 0, 1023, 0, 0);
 	input_set_abs_params(dev1, ABS_Y, 0, 767, 0, 0);
 	input_set_abs_params(dev1, ABS_PRESSURE, 0, 127, 0, 0);
 
 	if (priv->i->flags & ALPS_WHEEL) {
-		dev1->evbit[LONG(EV_REL)] |= BIT(EV_REL);
-		dev1->relbit[LONG(REL_WHEEL)] |= BIT(REL_WHEEL);
+		dev1->evbit[BIT_WORD(EV_REL)] |= BIT_MASK(EV_REL);
+		dev1->relbit[BIT_WORD(REL_WHEEL)] |= BIT_MASK(REL_WHEEL);
 	}
 
 	if (priv->i->flags & (ALPS_FW_BK_1 | ALPS_FW_BK_2)) {
-		dev1->keybit[LONG(BTN_FORWARD)] |= BIT(BTN_FORWARD);
-		dev1->keybit[LONG(BTN_BACK)] |= BIT(BTN_BACK);
+		dev1->keybit[BIT_WORD(BTN_FORWARD)] |= BIT_MASK(BTN_FORWARD);
+		dev1->keybit[BIT_WORD(BTN_BACK)] |= BIT_MASK(BTN_BACK);
 	}
 
 	snprintf(priv->phys, sizeof(priv->phys), "%s/input1", psmouse->ps2dev.serio->phys);
@@ -483,9 +484,10 @@
 	dev2->id.product = PSMOUSE_ALPS;
 	dev2->id.version = 0x0000;
 
-	dev2->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	dev2->relbit[LONG(REL_X)] |= BIT(REL_X) | BIT(REL_Y);
-	dev2->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+	dev2->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	dev2->relbit[BIT_WORD(REL_X)] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	dev2->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 
 	if (input_register_device(priv->dev2))
 		goto init_fail;
diff --git a/drivers/input/mouse/amimouse.c b/drivers/input/mouse/amimouse.c
index 239a0e1..a185ac7 100644
--- a/drivers/input/mouse/amimouse.c
+++ b/drivers/input/mouse/amimouse.c
@@ -111,9 +111,10 @@
 	amimouse_dev->id.product = 0x0002;
 	amimouse_dev->id.version = 0x0100;
 
-	amimouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	amimouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-	amimouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+	amimouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	amimouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	amimouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 	amimouse_dev->open = amimouse_open;
 	amimouse_dev->close = amimouse_close;
 
diff --git a/drivers/input/mouse/atarimouse.c b/drivers/input/mouse/atarimouse.c
index c8c7244..98a3561 100644
--- a/drivers/input/mouse/atarimouse.c
+++ b/drivers/input/mouse/atarimouse.c
@@ -137,9 +137,10 @@
 	atamouse_dev->id.product = 0x0002;
 	atamouse_dev->id.version = 0x0100;
 
-	atamouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	atamouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-	atamouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+	atamouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	atamouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	atamouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 	atamouse_dev->open = atamouse_open;
 	atamouse_dev->close = atamouse_close;
 
diff --git a/drivers/input/mouse/hil_ptr.c b/drivers/input/mouse/hil_ptr.c
index 449bf4d..27f88fb 100644
--- a/drivers/input/mouse/hil_ptr.c
+++ b/drivers/input/mouse/hil_ptr.c
@@ -298,12 +298,12 @@
 	idd = ptr->idd + 1;
 	txt = "unknown";
 	if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_REL) {
-		ptr->dev->evbit[0] = BIT(EV_REL);
+		ptr->dev->evbit[0] = BIT_MASK(EV_REL);
 		txt = "relative";
 	}
 
 	if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_ABS) {
-		ptr->dev->evbit[0] = BIT(EV_ABS);
+		ptr->dev->evbit[0] = BIT_MASK(EV_ABS);
 		txt = "absolute";
 	}
 	if (!ptr->dev->evbit[0])
@@ -311,7 +311,7 @@
 
 	ptr->nbtn = HIL_IDD_NUM_BUTTONS(idd);
 	if (ptr->nbtn)
-		ptr->dev->evbit[0] |= BIT(EV_KEY);
+		ptr->dev->evbit[0] |= BIT_MASK(EV_KEY);
 
 	naxsets = HIL_IDD_NUM_AXSETS(*idd);
 	ptr->naxes = HIL_IDD_NUM_AXES_PER_SET(*idd);
diff --git a/drivers/input/mouse/inport.c b/drivers/input/mouse/inport.c
index 79b624f..655a392 100644
--- a/drivers/input/mouse/inport.c
+++ b/drivers/input/mouse/inport.c
@@ -163,9 +163,10 @@
 	inport_dev->id.product = 0x0001;
 	inport_dev->id.version = 0x0100;
 
-	inport_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	inport_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	inport_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	inport_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	inport_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	inport_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	inport_dev->open  = inport_open;
 	inport_dev->close = inport_close;
diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c
index d7de4c5..9ec57d8 100644
--- a/drivers/input/mouse/lifebook.c
+++ b/drivers/input/mouse/lifebook.c
@@ -270,9 +270,10 @@
 	dev2->id.version = 0x0000;
 	dev2->dev.parent = &psmouse->ps2dev.serio->dev;
 
-	dev2->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	dev2->relbit[LONG(REL_X)] = BIT(REL_X) | BIT(REL_Y);
-	dev2->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT);
+	dev2->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	dev2->relbit[BIT_WORD(REL_X)] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	dev2->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT);
 
 	error = input_register_device(priv->dev2);
 	if (error)
@@ -295,9 +296,9 @@
 	if (lifebook_absolute_mode(psmouse))
 		return -1;
 
-	dev1->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
+	dev1->evbit[0] = BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY);
 	dev1->relbit[0] = 0;
-	dev1->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	dev1->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(dev1, ABS_X, 0, max_coord, 0, 0);
 	input_set_abs_params(dev1, ABS_Y, 0, max_coord, 0, 0);
 
diff --git a/drivers/input/mouse/logibm.c b/drivers/input/mouse/logibm.c
index 26c3b2e..b23a4f3 100644
--- a/drivers/input/mouse/logibm.c
+++ b/drivers/input/mouse/logibm.c
@@ -156,9 +156,10 @@
 	logibm_dev->id.product = 0x0001;
 	logibm_dev->id.version = 0x0100;
 
-	logibm_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	logibm_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	logibm_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	logibm_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	logibm_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	logibm_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	logibm_dev->open  = logibm_open;
 	logibm_dev->close = logibm_close;
diff --git a/drivers/input/mouse/pc110pad.c b/drivers/input/mouse/pc110pad.c
index 05d992e..8991ab0 100644
--- a/drivers/input/mouse/pc110pad.c
+++ b/drivers/input/mouse/pc110pad.c
@@ -144,9 +144,9 @@
 	pc110pad_dev->id.product = 0x0001;
 	pc110pad_dev->id.version = 0x0100;
 
-	pc110pad_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	pc110pad_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y);
-	pc110pad_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	pc110pad_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	pc110pad_dev->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y);
+	pc110pad_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
 	pc110pad_dev->absmax[ABS_X] = 0x1ff;
 	pc110pad_dev->absmax[ABS_Y] = 0x0ff;
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 0735257..da316d1 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1115,9 +1115,10 @@
 
 	input_dev->dev.parent = &psmouse->ps2dev.serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	psmouse->set_rate = psmouse_set_rate;
 	psmouse->set_resolution = psmouse_set_resolution;
diff --git a/drivers/input/mouse/rpcmouse.c b/drivers/input/mouse/rpcmouse.c
index 355efd0..18a4863 100644
--- a/drivers/input/mouse/rpcmouse.c
+++ b/drivers/input/mouse/rpcmouse.c
@@ -78,9 +78,10 @@
 	rpcmouse_dev->id.product = 0x0001;
 	rpcmouse_dev->id.version = 0x0100;
 
-	rpcmouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	rpcmouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	rpcmouse_dev->relbit[0]	= BIT(REL_X) | BIT(REL_Y);
+	rpcmouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	rpcmouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	rpcmouse_dev->relbit[0]	= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	rpcmouse_lastx = (short) iomd_readl(IOMD_MOUSEX);
 	rpcmouse_lasty = (short) iomd_readl(IOMD_MOUSEY);
diff --git a/drivers/input/mouse/sermouse.c b/drivers/input/mouse/sermouse.c
index 77b8ee2..ed917bf 100644
--- a/drivers/input/mouse/sermouse.c
+++ b/drivers/input/mouse/sermouse.c
@@ -268,9 +268,10 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT);
-	input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT);
+	input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	if (c & 0x01) set_bit(BTN_MIDDLE, input_dev->keybit);
 	if (c & 0x02) set_bit(BTN_SIDE, input_dev->keybit);
diff --git a/drivers/input/mouse/touchkit_ps2.c b/drivers/input/mouse/touchkit_ps2.c
index 7b977fd..3fadb2a 100644
--- a/drivers/input/mouse/touchkit_ps2.c
+++ b/drivers/input/mouse/touchkit_ps2.c
@@ -85,7 +85,7 @@
 		return -ENODEV;
 
 	if (set_properties) {
-		dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 		set_bit(BTN_TOUCH, dev->keybit);
 		input_set_abs_params(dev, ABS_X, 0, TOUCHKIT_MAX_XC, 0, 0);
 		input_set_abs_params(dev, ABS_Y, 0, TOUCHKIT_MAX_YC, 0, 0);
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index 79146d6..78c3ea7 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -998,34 +998,36 @@
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT |
 				INPUT_DEVICE_ID_MATCH_RELBIT,
-		.evbit = { BIT(EV_KEY) | BIT(EV_REL) },
-		.keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) },
-		.relbit = { BIT(REL_X) | BIT(REL_Y) },
+		.evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) },
+		.keybit = { [BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) },
+		.relbit = { BIT_MASK(REL_X) | BIT_MASK(REL_Y) },
 	},	/* A mouse like device, at least one button,
 		   two relative axes */
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_RELBIT,
-		.evbit = { BIT(EV_KEY) | BIT(EV_REL) },
-		.relbit = { BIT(REL_WHEEL) },
+		.evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) },
+		.relbit = { BIT_MASK(REL_WHEEL) },
 	},	/* A separate scrollwheel */
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
-		.keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
-		.absbit = { BIT(ABS_X) | BIT(ABS_Y) },
+		.evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) },
+		.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
+		.absbit = { BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) },
 	},	/* A tablet like device, at least touch detection,
 		   two absolute axes */
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
-		.keybit = { [LONG(BTN_TOOL_FINGER)] = BIT(BTN_TOOL_FINGER) },
-		.absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) |
-				BIT(ABS_TOOL_WIDTH) },
+		.evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) },
+		.keybit = { [BIT_WORD(BTN_TOOL_FINGER)] =
+				BIT_MASK(BTN_TOOL_FINGER) },
+		.absbit = { BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+				BIT_MASK(ABS_PRESSURE) |
+				BIT_MASK(ABS_TOOL_WIDTH) },
 	},	/* A touchpad */
 
 	{ },	/* Terminating entry */
diff --git a/drivers/input/tablet/acecad.c b/drivers/input/tablet/acecad.c
index dd231045..b973d0e 100644
--- a/drivers/input/tablet/acecad.c
+++ b/drivers/input/tablet/acecad.c
@@ -192,10 +192,14 @@
 	input_dev->open = usb_acecad_open;
 	input_dev->close = usb_acecad_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE);
-	input_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-	input_dev->keybit[LONG(BTN_DIGI)] = BIT(BTN_TOOL_PEN) |BIT(BTN_TOUCH) | BIT(BTN_STYLUS) | BIT(BTN_STYLUS2);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+		BIT_MASK(ABS_PRESSURE);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] = BIT_MASK(BTN_TOOL_PEN) |
+		BIT_MASK(BTN_TOUCH) | BIT_MASK(BTN_STYLUS) |
+		BIT_MASK(BTN_STYLUS2);
 
 	switch (id->driver_info) {
 		case 0:
diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c
index b2ca10f2f..d2c6da2 100644
--- a/drivers/input/tablet/gtco.c
+++ b/drivers/input/tablet/gtco.c
@@ -573,10 +573,12 @@
 	struct gtco *device = input_get_drvdata(inputdev);
 
 	/* Which events */
-	inputdev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_MSC);
+	inputdev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+		BIT_MASK(EV_MSC);
 
 	/* Misc event menu block */
-	inputdev->mscbit[0] = BIT(MSC_SCAN)|BIT(MSC_SERIAL)|BIT(MSC_RAW) ;
+	inputdev->mscbit[0] = BIT_MASK(MSC_SCAN) | BIT_MASK(MSC_SERIAL) |
+		BIT_MASK(MSC_RAW);
 
 	/* Absolute values based on HID report info */
 	input_set_abs_params(inputdev, ABS_X, device->min_X, device->max_X,
diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c
index 91e6d00..1182fc1 100644
--- a/drivers/input/tablet/kbtab.c
+++ b/drivers/input/tablet/kbtab.c
@@ -153,10 +153,13 @@
 	input_dev->open = kbtab_open;
 	input_dev->close = kbtab_close;
 
-	input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_MSC);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_PEN) | BIT(BTN_TOUCH);
-	input_dev->mscbit[0] |= BIT(MSC_SERIAL);
+	input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+		BIT_MASK(EV_MSC);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_PEN) |
+		BIT_MASK(BTN_TOUCH);
+	input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
 	input_set_abs_params(input_dev, ABS_X, 0, 0x2000, 4, 0);
 	input_set_abs_params(input_dev, ABS_Y, 0, 0x1750, 4, 0);
 	input_set_abs_params(input_dev, ABS_PRESSURE, 0, 0xff, 0, 0);
diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index 064e123..d64b1ea1 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c
@@ -140,48 +140,58 @@
 
 void input_dev_mo(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_1) | BIT(BTN_5);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_1) |
+		BIT_MASK(BTN_5);
 	input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0);
 }
 
 void input_dev_g4(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->evbit[0] |= BIT(EV_MSC);
-	input_dev->mscbit[0] |= BIT(MSC_SERIAL);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_FINGER);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_0) | BIT(BTN_4);
+	input_dev->evbit[0] |= BIT_MASK(EV_MSC);
+	input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_FINGER);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_0) |
+		BIT_MASK(BTN_4);
 }
 
 void input_dev_g(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->evbit[0] |= BIT(EV_REL);
-	input_dev->relbit[0] |= BIT(REL_WHEEL);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER) | BIT(BTN_TOOL_MOUSE) | BIT(BTN_STYLUS2);
+	input_dev->evbit[0] |= BIT_MASK(EV_REL);
+	input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER) |
+		BIT_MASK(BTN_TOOL_MOUSE) | BIT_MASK(BTN_STYLUS2);
 	input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0);
 }
 
 void input_dev_i3s(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_FINGER);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_0) | BIT(BTN_1) | BIT(BTN_2) | BIT(BTN_3);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_FINGER);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_0) |
+		BIT_MASK(BTN_1) | BIT_MASK(BTN_2) | BIT_MASK(BTN_3);
 	input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
 }
 
 void input_dev_i3(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_4) | BIT(BTN_5) | BIT(BTN_6) | BIT(BTN_7);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_4) |
+		BIT_MASK(BTN_5) | BIT_MASK(BTN_6) | BIT_MASK(BTN_7);
 	input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
 }
 
 void input_dev_i(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->evbit[0] |= BIT(EV_MSC) | BIT(EV_REL);
-	input_dev->mscbit[0] |= BIT(MSC_SERIAL);
-	input_dev->relbit[0] |= BIT(REL_WHEEL);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE) | BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER) | BIT(BTN_TOOL_MOUSE)	| BIT(BTN_TOOL_BRUSH)
-		| BIT(BTN_TOOL_PENCIL) | BIT(BTN_TOOL_AIRBRUSH) | BIT(BTN_TOOL_LENS) | BIT(BTN_STYLUS2);
+	input_dev->evbit[0] |= BIT_MASK(EV_MSC) | BIT_MASK(EV_REL);
+	input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
+	input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE) |
+		BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER) |
+		BIT_MASK(BTN_TOOL_MOUSE) | BIT_MASK(BTN_TOOL_BRUSH) |
+		BIT_MASK(BTN_TOOL_PENCIL) | BIT_MASK(BTN_TOOL_AIRBRUSH) |
+		BIT_MASK(BTN_TOOL_LENS) | BIT_MASK(BTN_STYLUS2);
 	input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0);
 	input_set_abs_params(input_dev, ABS_WHEEL, 0, 1023, 0, 0);
 	input_set_abs_params(input_dev, ABS_TILT_X, 0, 127, 0, 0);
@@ -192,12 +202,13 @@
 
 void input_dev_pl(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_STYLUS2) | BIT(BTN_TOOL_RUBBER);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_STYLUS2) |
+		BIT_MASK(BTN_TOOL_RUBBER);
 }
 
 void input_dev_pt(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER);
 }
 
 static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *id)
@@ -243,12 +254,13 @@
 	input_dev->open = wacom_open;
 	input_dev->close = wacom_close;
 
-	input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_PEN) | BIT(BTN_TOUCH) | BIT(BTN_STYLUS);
+	input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_PEN) |
+		BIT_MASK(BTN_TOUCH) | BIT_MASK(BTN_STYLUS);
 	input_set_abs_params(input_dev, ABS_X, 0, wacom_wac->features->x_max, 4, 0);
 	input_set_abs_params(input_dev, ABS_Y, 0, wacom_wac->features->y_max, 4, 0);
 	input_set_abs_params(input_dev, ABS_PRESSURE, 0, wacom_wac->features->pressure_max, 0, 0);
-	input_dev->absbit[LONG(ABS_MISC)] |= BIT(ABS_MISC);
+	input_dev->absbit[BIT_WORD(ABS_MISC)] |= BIT_MASK(ABS_MISC);
 
 	wacom_init_input_dev(input_dev, wacom_wac);
 
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 51ae4fb..f59aecf 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -917,8 +917,8 @@
 	input_dev->phys = ts->phys;
 	input_dev->dev.parent = &spi->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(input_dev, ABS_X,
 			pdata->x_min ? : 0,
 			pdata->x_max ? : MAX_12BIT,
diff --git a/drivers/input/touchscreen/corgi_ts.c b/drivers/input/touchscreen/corgi_ts.c
index e6a31d1..b1b2e07 100644
--- a/drivers/input/touchscreen/corgi_ts.c
+++ b/drivers/input/touchscreen/corgi_ts.c
@@ -302,8 +302,8 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &pdev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(input_dev, ABS_X, X_AXIS_MIN, X_AXIS_MAX, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, Y_AXIS_MIN, Y_AXIS_MAX, 0, 0);
 	input_set_abs_params(input_dev, ABS_PRESSURE, PRESSURE_MIN, PRESSURE_MAX, 0, 0);
diff --git a/drivers/input/touchscreen/elo.c b/drivers/input/touchscreen/elo.c
index 557d781..d20689c 100644
--- a/drivers/input/touchscreen/elo.c
+++ b/drivers/input/touchscreen/elo.c
@@ -320,8 +320,8 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
 	serio_set_drvdata(serio, elo);
 	err = serio_open(serio, drv);
diff --git a/drivers/input/touchscreen/fujitsu_ts.c b/drivers/input/touchscreen/fujitsu_ts.c
index daf7a4a..80b2180 100644
--- a/drivers/input/touchscreen/fujitsu_ts.c
+++ b/drivers/input/touchscreen/fujitsu_ts.c
@@ -122,8 +122,8 @@
 	input_dev->id.vendor = SERIO_FUJITSU;
 	input_dev->id.product = 0;
 	input_dev->id.version = 0x0100;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
 	input_set_abs_params(input_dev, ABS_X, 0, 4096, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, 0, 4096, 0, 0);
diff --git a/drivers/input/touchscreen/gunze.c b/drivers/input/touchscreen/gunze.c
index 39d6026..a48a158 100644
--- a/drivers/input/touchscreen/gunze.c
+++ b/drivers/input/touchscreen/gunze.c
@@ -137,8 +137,8 @@
 	input_dev->id.product = 0x0051;
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(input_dev, ABS_X, 24, 1000, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, 24, 1000, 0, 0);
 
diff --git a/drivers/input/touchscreen/h3600_ts_input.c b/drivers/input/touchscreen/h3600_ts_input.c
index 09ed780..2ae6c60 100644
--- a/drivers/input/touchscreen/h3600_ts_input.c
+++ b/drivers/input/touchscreen/h3600_ts_input.c
@@ -373,8 +373,9 @@
 
 	input_dev->event = h3600ts_event;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_LED) | BIT(EV_PWR);
-	input_dev->ledbit[0] = BIT(LED_SLEEP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+		BIT_MASK(EV_LED) | BIT_MASK(EV_PWR);
+	input_dev->ledbit[0] = BIT_MASK(LED_SLEEP);
 	input_set_abs_params(input_dev, ABS_X, 60, 985, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, 35, 1024, 0, 0);
 
diff --git a/drivers/input/touchscreen/hp680_ts_input.c b/drivers/input/touchscreen/hp680_ts_input.c
index 1a15475..c38d4e0 100644
--- a/drivers/input/touchscreen/hp680_ts_input.c
+++ b/drivers/input/touchscreen/hp680_ts_input.c
@@ -81,8 +81,8 @@
 	if (!hp680_ts_dev)
 		return -ENOMEM;
 
-	hp680_ts_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
-	hp680_ts_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	hp680_ts_dev->evbit[0] = BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY);
+	hp680_ts_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
 	input_set_abs_params(hp680_ts_dev, ABS_X,
 		HP680_TS_ABS_X_MIN, HP680_TS_ABS_X_MAX, 0, 0);
diff --git a/drivers/input/touchscreen/mk712.c b/drivers/input/touchscreen/mk712.c
index 44140fe..80a6588 100644
--- a/drivers/input/touchscreen/mk712.c
+++ b/drivers/input/touchscreen/mk712.c
@@ -186,8 +186,8 @@
 	mk712_dev->open    = mk712_open;
 	mk712_dev->close   = mk712_close;
 
-	mk712_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	mk712_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	mk712_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	mk712_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(mk712_dev, ABS_X, 0, 0xfff, 88, 0);
 	input_set_abs_params(mk712_dev, ABS_Y, 0, 0xfff, 88, 0);
 
diff --git a/drivers/input/touchscreen/mtouch.c b/drivers/input/touchscreen/mtouch.c
index 4ec3b1f..9077228 100644
--- a/drivers/input/touchscreen/mtouch.c
+++ b/drivers/input/touchscreen/mtouch.c
@@ -151,8 +151,8 @@
 	input_dev->id.product = 0;
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(mtouch->dev, ABS_X, MTOUCH_MIN_XC, MTOUCH_MAX_XC, 0, 0);
 	input_set_abs_params(mtouch->dev, ABS_Y, MTOUCH_MIN_YC, MTOUCH_MAX_YC, 0, 0);
 
diff --git a/drivers/input/touchscreen/penmount.c b/drivers/input/touchscreen/penmount.c
index f2c0d3c..c7f9ceb 100644
--- a/drivers/input/touchscreen/penmount.c
+++ b/drivers/input/touchscreen/penmount.c
@@ -113,8 +113,8 @@
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-        input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-        input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+        input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+        input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
         input_set_abs_params(pm->dev, ABS_X, 0, 0x3ff, 0, 0);
         input_set_abs_params(pm->dev, ABS_Y, 0, 0x3ff, 0, 0);
 
diff --git a/drivers/input/touchscreen/touchright.c b/drivers/input/touchscreen/touchright.c
index 3def7bb..3a5c142 100644
--- a/drivers/input/touchscreen/touchright.c
+++ b/drivers/input/touchscreen/touchright.c
@@ -125,8 +125,8 @@
 	input_dev->id.product = 0;
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(tr->dev, ABS_X, TR_MIN_XC, TR_MAX_XC, 0, 0);
 	input_set_abs_params(tr->dev, ABS_Y, TR_MIN_YC, TR_MAX_YC, 0, 0);
 
diff --git a/drivers/input/touchscreen/touchwin.c b/drivers/input/touchscreen/touchwin.c
index ac4bdcf..763a656 100644
--- a/drivers/input/touchscreen/touchwin.c
+++ b/drivers/input/touchscreen/touchwin.c
@@ -132,8 +132,8 @@
 	input_dev->id.product = 0;
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(tw->dev, ABS_X, TW_MIN_XC, TW_MAX_XC, 0, 0);
 	input_set_abs_params(tw->dev, ABS_Y, TW_MIN_YC, TW_MAX_YC, 0, 0);
 
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 89373b0..7549939 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -517,7 +517,7 @@
 	idev->id.product	= id;
 	idev->open		= ucb1400_ts_open;
 	idev->close		= ucb1400_ts_close;
-	idev->evbit[0]		= BIT(EV_ABS);
+	idev->evbit[0]		= BIT_MASK(EV_ABS);
 
 	ucb1400_adc_enable(ucb);
 	x_res = ucb1400_ts_read_xres(ucb);
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 9fb3d5c3..5f34b78 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -868,8 +868,8 @@
 	input_dev->open = usbtouch_open;
 	input_dev->close = usbtouch_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(input_dev, ABS_X, type->min_xc, type->max_xc, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, type->min_yc, type->max_yc, 0, 0);
 	if (type->max_press)
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 428872b..669f6f6 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -486,11 +486,13 @@
 					card->name);
 		} else {
 			memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
-			if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF)
+			if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF) {
+				spin_lock(&card->lock);
 				capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
-						     CAPIMSG_NCCI(skb->data),
-						     CAPIMSG_MSGID(skb->data));
-
+					CAPIMSG_NCCI(skb->data),
+					CAPIMSG_MSGID(skb->data));
+				spin_unlock(&card->lock);
+			}
 			capi_ctr_handle_message(ctrl, ApplId, skb);
 		}
 		break;
@@ -500,9 +502,9 @@
 		ApplId = _get_word(&p);
 		NCCI = _get_word(&p);
 		WindowSize = _get_word(&p);
-
+		spin_lock(&card->lock);
 		capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize);
-
+		spin_unlock(&card->lock);
 		break;
 
 	case RECEIVE_FREE_NCCI:
@@ -510,9 +512,11 @@
 		ApplId = _get_word(&p);
 		NCCI = _get_word(&p);
 
-		if (NCCI != 0xffffffff)
+		if (NCCI != 0xffffffff) {
+			spin_lock(&card->lock);
 			capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI);
-
+			spin_unlock(&card->lock);
+		}
 		break;
 
 	case RECEIVE_START:
@@ -751,10 +755,10 @@
 
 	spin_lock_irqsave(&card->lock, flags);
  	b1dma_reset(card);
-	spin_unlock_irqrestore(&card->lock, flags);
 
 	memset(cinfo->version, 0, sizeof(cinfo->version));
 	capilib_release(&cinfo->ncci_head);
+	spin_unlock_irqrestore(&card->lock, flags);
 	capi_ctr_reseted(ctrl);
 }
 
@@ -803,8 +807,11 @@
 	avmcard *card = cinfo->card;
 	struct sk_buff *skb;
 	void *p;
+	unsigned long flags;
 
+	spin_lock_irqsave(&card->lock, flags);
 	capilib_release_appl(&cinfo->ncci_head, appl);
+	spin_unlock_irqrestore(&card->lock, flags);
 
 	skb = alloc_skb(7, GFP_ATOMIC);
 	if (!skb) {
@@ -832,10 +839,13 @@
 	u16 retval = CAPI_NOERROR;
 
  	if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) {
+		unsigned long flags;
+		spin_lock_irqsave(&card->lock, flags);
 		retval = capilib_data_b3_req(&cinfo->ncci_head,
 					     CAPIMSG_APPID(skb->data),
 					     CAPIMSG_NCCI(skb->data),
 					     CAPIMSG_MSGID(skb->data));
+		spin_unlock_irqrestore(&card->lock, flags);
 	}
 	if (retval == CAPI_NOERROR) 
 		b1dma_queue_tx(card, skb);
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 8710cf6..4bbbbe6 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -678,7 +678,9 @@
                 for (i=0; i < card->nr_controllers; i++) {
 			avmctrl_info *cinfo = &card->ctrlinfo[i];
 			memset(cinfo->version, 0, sizeof(cinfo->version));
+			spin_lock_irqsave(&card->lock, flags);
 			capilib_release(&cinfo->ncci_head);
+			spin_unlock_irqrestore(&card->lock, flags);
 			capi_ctr_reseted(&cinfo->capi_ctrl);
 		}
 		card->nlogcontr = 0;
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index c925020..6130724 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -180,8 +180,8 @@
 
 			ApplId = (unsigned) b1_get_word(card->port);
 			MsgLen = t1_get_slice(card->port, card->msgbuf);
-			spin_unlock_irqrestore(&card->lock, flags);
 			if (!(skb = alloc_skb(MsgLen, GFP_ATOMIC))) {
+				spin_unlock_irqrestore(&card->lock, flags);
 				printk(KERN_ERR "%s: incoming packet dropped\n",
 						card->name);
 			} else {
@@ -190,7 +190,7 @@
 					capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
 							     CAPIMSG_NCCI(skb->data),
 							     CAPIMSG_MSGID(skb->data));
-
+				spin_unlock_irqrestore(&card->lock, flags);
 				capi_ctr_handle_message(ctrl, ApplId, skb);
 			}
 			break;
@@ -200,21 +200,17 @@
 			ApplId = b1_get_word(card->port);
 			NCCI = b1_get_word(card->port);
 			WindowSize = b1_get_word(card->port);
-			spin_unlock_irqrestore(&card->lock, flags);
-
 			capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize);
-
+			spin_unlock_irqrestore(&card->lock, flags);
 			break;
 
 		case RECEIVE_FREE_NCCI:
 
 			ApplId = b1_get_word(card->port);
 			NCCI = b1_get_word(card->port);
-			spin_unlock_irqrestore(&card->lock, flags);
-
 			if (NCCI != 0xffffffff)
 				capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI);
-
+			spin_unlock_irqrestore(&card->lock, flags);
 			break;
 
 		case RECEIVE_START:
@@ -333,13 +329,16 @@
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
 	avmcard *card = cinfo->card;
 	unsigned int port = card->port;
+	unsigned long flags;
 
 	t1_disable_irq(port);
 	b1_reset(port);
 	b1_reset(port);
 
 	memset(cinfo->version, 0, sizeof(cinfo->version));
+	spin_lock_irqsave(&card->lock, flags);
 	capilib_release(&cinfo->ncci_head);
+	spin_unlock_irqrestore(&card->lock, flags);
 	capi_ctr_reseted(ctrl);
 }
 
@@ -466,29 +465,26 @@
 	u8 subcmd = CAPIMSG_SUBCOMMAND(skb->data);
 	u16 dlen, retval;
 
+	spin_lock_irqsave(&card->lock, flags);
 	if (CAPICMD(cmd, subcmd) == CAPI_DATA_B3_REQ) {
 		retval = capilib_data_b3_req(&cinfo->ncci_head,
 					     CAPIMSG_APPID(skb->data),
 					     CAPIMSG_NCCI(skb->data),
 					     CAPIMSG_MSGID(skb->data));
-		if (retval != CAPI_NOERROR) 
+		if (retval != CAPI_NOERROR) {
+			spin_unlock_irqrestore(&card->lock, flags);
 			return retval;
-
+		}
 		dlen = CAPIMSG_DATALEN(skb->data);
 
-		spin_lock_irqsave(&card->lock, flags);
 		b1_put_byte(port, SEND_DATA_B3_REQ);
 		t1_put_slice(port, skb->data, len);
 		t1_put_slice(port, skb->data + len, dlen);
-		spin_unlock_irqrestore(&card->lock, flags);
 	} else {
-
-		spin_lock_irqsave(&card->lock, flags);
 		b1_put_byte(port, SEND_MESSAGE);
 		t1_put_slice(port, skb->data, len);
-		spin_unlock_irqrestore(&card->lock, flags);
 	}
-
+	spin_unlock_irqrestore(&card->lock, flags);
 	dev_kfree_skb_any(skb);
 	return CAPI_NOERROR;
 }
diff --git a/drivers/isdn/sc/debug.h b/drivers/isdn/sc/debug.h
deleted file mode 100644
index e9db96e..0000000
--- a/drivers/isdn/sc/debug.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* $Id: debug.h,v 1.2.8.1 2001/09/23 22:24:59 kai Exp $
- *
- * Copyright (C) 1996  SpellCaster Telecommunications Inc.
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- * For more information, please contact gpl-info@spellcast.com or write:
- *
- *     SpellCaster Telecommunications Inc.
- *     5621 Finch Avenue East, Unit #3
- *     Scarborough, Ontario  Canada
- *     M1B 2T9
- *     +1 (416) 297-8565
- *     +1 (416) 297-6433 Facsimile
- */
-
-#define REQUEST_IRQ(a,b,c,d,e) request_irq(a,b,c,d,e)
-#define FREE_IRQ(a,b) free_irq(a,b)
diff --git a/drivers/isdn/sc/includes.h b/drivers/isdn/sc/includes.h
index 5286e0c..4766e5b 100644
--- a/drivers/isdn/sc/includes.h
+++ b/drivers/isdn/sc/includes.h
@@ -14,4 +14,3 @@
 #include <linux/timer.h>
 #include <linux/wait.h>
 #include <linux/isdnif.h>
-#include "debug.h"
diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c
index 0bf7634..d09c854 100644
--- a/drivers/isdn/sc/init.c
+++ b/drivers/isdn/sc/init.c
@@ -404,7 +404,7 @@
 		/*
 		 * Release the IRQ
 		 */
-		FREE_IRQ(sc_adapter[i]->interrupt, NULL);
+		free_irq(sc_adapter[i]->interrupt, NULL);
 
 		/*
 		 * Reset for a clean start
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index 2766e4f..883da72 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -791,8 +791,10 @@
 			if (hid->keycode[i])
 				set_bit(hid->keycode[i], input_dev->keybit);
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_REP);
-		input_dev->ledbit[0] = BIT(LED_SCROLLL) | BIT(LED_CAPSL) | BIT(LED_NUML);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
+			BIT_MASK(EV_REP);
+		input_dev->ledbit[0] = BIT_MASK(LED_SCROLLL) |
+			BIT_MASK(LED_CAPSL) | BIT_MASK(LED_NUML);
 		input_dev->event = adbhid_kbd_event;
 		input_dev->keycodemax = KEY_FN;
 		input_dev->keycodesize = sizeof(hid->keycode[0]);
@@ -801,16 +803,18 @@
 	case ADB_MOUSE:
 		sprintf(hid->name, "ADB mouse");
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-		input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-		input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+		input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+			BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+		input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 		break;
 
 	case ADB_MISC:
 		switch (original_handler_id) {
 		case 0x02: /* Adjustable keyboard button device */
 			sprintf(hid->name, "ADB adjustable keyboard buttons");
-			input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+			input_dev->evbit[0] = BIT_MASK(EV_KEY) |
+				BIT_MASK(EV_REP);
 			set_bit(KEY_SOUND, input_dev->keybit);
 			set_bit(KEY_MUTE, input_dev->keybit);
 			set_bit(KEY_VOLUMEUP, input_dev->keybit);
@@ -818,7 +822,8 @@
 			break;
 		case 0x1f: /* Powerbook button device */
 			sprintf(hid->name, "ADB Powerbook buttons");
-			input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+			input_dev->evbit[0] = BIT_MASK(EV_KEY) |
+				BIT_MASK(EV_REP);
 			set_bit(KEY_MUTE, input_dev->keybit);
 			set_bit(KEY_VOLUMEUP, input_dev->keybit);
 			set_bit(KEY_VOLUMEDOWN, input_dev->keybit);
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c
index 33dee3a..8930230 100644
--- a/drivers/macintosh/mac_hid.c
+++ b/drivers/macintosh/mac_hid.c
@@ -117,9 +117,10 @@
 	emumousebtn->id.product = 0x0001;
 	emumousebtn->id.version = 0x0100;
 
-	emumousebtn->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	emumousebtn->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	emumousebtn->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	emumousebtn->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	emumousebtn->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	emumousebtn->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	ret = input_register_device(emumousebtn);
 	if (ret)
diff --git a/drivers/macintosh/mediabay.c b/drivers/macintosh/mediabay.c
index c803d2b..48d647a 100644
--- a/drivers/macintosh/mediabay.c
+++ b/drivers/macintosh/mediabay.c
@@ -563,7 +563,7 @@
 				ide_init_hwif_ports(&hw, (unsigned long) bay->cd_base, (unsigned long) 0, NULL);
 				hw.irq = bay->cd_irq;
 				hw.chipset = ide_pmac;
-				bay->cd_index = ide_register_hw(&hw, 0, NULL);
+				bay->cd_index = ide_register_hw(&hw, NULL, 0, NULL);
 				pmu_resume();
 			}
 			if (bay->cd_index == -1) {
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 34a8c60..9b6fbf0 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -267,6 +267,12 @@
 	---help---
 	  Multipath support for LSI/Engenio RDAC.
 
+config DM_MULTIPATH_HP
+        tristate "HP MSA multipath support (EXPERIMENTAL)"
+        depends on DM_MULTIPATH && BLK_DEV_DM && EXPERIMENTAL
+        ---help---
+          Multipath support for HP MSA (Active/Passive) series hardware.
+
 config DM_DELAY
 	tristate "I/O delaying target (EXPERIMENTAL)"
 	depends on BLK_DEV_DM && EXPERIMENTAL
@@ -276,4 +282,10 @@
 
 	If unsure, say N.
 
+config DM_UEVENT
+	bool "DM uevents (EXPERIMENTAL)"
+	depends on BLK_DEV_DM && EXPERIMENTAL
+	---help---
+	Generate udev events for DM events.
+
 endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index c49366c..d9aa7ed 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -8,6 +8,7 @@
 dm-snapshot-objs := dm-snap.o dm-exception-store.o
 dm-mirror-objs	:= dm-log.o dm-raid1.o
 dm-rdac-objs	:= dm-mpath-rdac.o
+dm-hp-sw-objs	:= dm-mpath-hp-sw.o
 md-mod-objs     := md.o bitmap.o
 raid456-objs	:= raid5.o raid6algos.o raid6recov.o raid6tables.o \
 		   raid6int1.o raid6int2.o raid6int4.o \
@@ -35,6 +36,7 @@
 obj-$(CONFIG_DM_DELAY)		+= dm-delay.o
 obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o dm-round-robin.o
 obj-$(CONFIG_DM_MULTIPATH_EMC)	+= dm-emc.o
+obj-$(CONFIG_DM_MULTIPATH_HP)	+= dm-hp-sw.o
 obj-$(CONFIG_DM_MULTIPATH_RDAC)	+= dm-rdac.o
 obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
@@ -48,6 +50,10 @@
 altivec_flags := -maltivec -mabi=altivec
 endif
 
+ifeq ($(CONFIG_DM_UEVENT),y)
+dm-mod-objs			+= dm-uevent.o
+endif
+
 targets += raid6int1.c
 $(obj)/raid6int1.c:   UNROLL := 1
 $(obj)/raid6int1.c:   $(src)/raid6int.uc $(src)/unroll.pl FORCE
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
index 3f7b827..d4509be 100644
--- a/drivers/md/dm-bio-list.h
+++ b/drivers/md/dm-bio-list.h
@@ -21,11 +21,6 @@
 	return bl->head == NULL;
 }
 
-#define BIO_LIST_INIT { .head = NULL, .tail = NULL }
-
-#define BIO_LIST(bl) \
-	struct bio_list bl = BIO_LIST_INIT
-
 static inline void bio_list_init(struct bio_list *bl)
 {
 	bl->head = bl->tail = NULL;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 64fee90..b41f945 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -36,7 +36,6 @@
 	struct work_struct work;
 	atomic_t pending;
 	int error;
-	int post_process;
 };
 
 /*
@@ -57,7 +56,7 @@
 
 struct crypt_iv_operations {
 	int (*ctr)(struct crypt_config *cc, struct dm_target *ti,
-	           const char *opts);
+		   const char *opts);
 	void (*dtr)(struct crypt_config *cc);
 	const char *(*status)(struct crypt_config *cc);
 	int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector);
@@ -80,6 +79,8 @@
 	mempool_t *page_pool;
 	struct bio_set *bs;
 
+	struct workqueue_struct *io_queue;
+	struct workqueue_struct *crypt_queue;
 	/*
 	 * crypto related data
 	 */
@@ -137,7 +138,7 @@
 }
 
 static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
-	                      const char *opts)
+			      const char *opts)
 {
 	struct crypto_cipher *essiv_tfm;
 	struct crypto_hash *hash_tfm;
@@ -175,6 +176,7 @@
 
 	if (err) {
 		ti->error = "Error calculating hash in ESSIV";
+		kfree(salt);
 		return err;
 	}
 
@@ -188,7 +190,7 @@
 	if (crypto_cipher_blocksize(essiv_tfm) !=
 	    crypto_blkcipher_ivsize(cc->tfm)) {
 		ti->error = "Block size of ESSIV cipher does "
-			        "not match IV size of block cipher";
+			    "not match IV size of block cipher";
 		crypto_free_cipher(essiv_tfm);
 		kfree(salt);
 		return -EINVAL;
@@ -319,10 +321,10 @@
 	return r;
 }
 
-static void
-crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx,
-                   struct bio *bio_out, struct bio *bio_in,
-                   sector_t sector, int write)
+static void crypt_convert_init(struct crypt_config *cc,
+			       struct convert_context *ctx,
+			       struct bio *bio_out, struct bio *bio_in,
+			       sector_t sector, int write)
 {
 	ctx->bio_in = bio_in;
 	ctx->bio_out = bio_out;
@@ -338,7 +340,7 @@
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
 static int crypt_convert(struct crypt_config *cc,
-                         struct convert_context *ctx)
+			 struct convert_context *ctx)
 {
 	int r = 0;
 
@@ -370,7 +372,7 @@
 		}
 
 		r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length,
-		                              ctx->write, ctx->sector);
+					      ctx->write, ctx->sector);
 		if (r < 0)
 			break;
 
@@ -380,13 +382,13 @@
 	return r;
 }
 
- static void dm_crypt_bio_destructor(struct bio *bio)
- {
+static void dm_crypt_bio_destructor(struct bio *bio)
+{
 	struct dm_crypt_io *io = bio->bi_private;
 	struct crypt_config *cc = io->target->private;
 
 	bio_free(bio, cc->bs);
- }
+}
 
 /*
  * Generate a new unfragmented bio with the given size
@@ -458,7 +460,7 @@
  * One of the bios was finished. Check for completion of
  * the whole request and correctly clean up the buffer.
  */
-static void dec_pending(struct dm_crypt_io *io, int error)
+static void crypt_dec_pending(struct dm_crypt_io *io, int error)
 {
 	struct crypt_config *cc = (struct crypt_config *) io->target->private;
 
@@ -474,18 +476,36 @@
 }
 
 /*
- * kcryptd:
+ * kcryptd/kcryptd_io:
  *
  * Needed because it would be very unwise to do decryption in an
  * interrupt context.
+ *
+ * kcryptd performs the actual encryption or decryption.
+ *
+ * kcryptd_io performs the IO submission.
+ *
+ * They must be separated as otherwise the final stages could be
+ * starved by new requests which can block in the first stages due
+ * to memory allocation.
  */
-static struct workqueue_struct *_kcryptd_workqueue;
 static void kcryptd_do_work(struct work_struct *work);
+static void kcryptd_do_crypt(struct work_struct *work);
 
 static void kcryptd_queue_io(struct dm_crypt_io *io)
 {
+	struct crypt_config *cc = io->target->private;
+
 	INIT_WORK(&io->work, kcryptd_do_work);
-	queue_work(_kcryptd_workqueue, &io->work);
+	queue_work(cc->io_queue, &io->work);
+}
+
+static void kcryptd_queue_crypt(struct dm_crypt_io *io)
+{
+	struct crypt_config *cc = io->target->private;
+
+	INIT_WORK(&io->work, kcryptd_do_crypt);
+	queue_work(cc->crypt_queue, &io->work);
 }
 
 static void crypt_endio(struct bio *clone, int error)
@@ -508,13 +528,12 @@
 	}
 
 	bio_put(clone);
-	io->post_process = 1;
-	kcryptd_queue_io(io);
+	kcryptd_queue_crypt(io);
 	return;
 
 out:
 	bio_put(clone);
-	dec_pending(io, error);
+	crypt_dec_pending(io, error);
 }
 
 static void clone_init(struct dm_crypt_io *io, struct bio *clone)
@@ -544,7 +563,7 @@
 	 */
 	clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs);
 	if (unlikely(!clone)) {
-		dec_pending(io, -ENOMEM);
+		crypt_dec_pending(io, -ENOMEM);
 		return;
 	}
 
@@ -579,7 +598,7 @@
 	while (remaining) {
 		clone = crypt_alloc_buffer(io, remaining);
 		if (unlikely(!clone)) {
-			dec_pending(io, -ENOMEM);
+			crypt_dec_pending(io, -ENOMEM);
 			return;
 		}
 
@@ -589,7 +608,7 @@
 		if (unlikely(crypt_convert(cc, &ctx) < 0)) {
 			crypt_free_buffer_pages(cc, clone);
 			bio_put(clone);
-			dec_pending(io, -EIO);
+			crypt_dec_pending(io, -EIO);
 			return;
 		}
 
@@ -624,17 +643,23 @@
 	crypt_convert_init(cc, &ctx, io->base_bio, io->base_bio,
 			   io->base_bio->bi_sector - io->target->begin, 0);
 
-	dec_pending(io, crypt_convert(cc, &ctx));
+	crypt_dec_pending(io, crypt_convert(cc, &ctx));
 }
 
 static void kcryptd_do_work(struct work_struct *work)
 {
 	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
 
-	if (io->post_process)
-		process_read_endio(io);
-	else if (bio_data_dir(io->base_bio) == READ)
+	if (bio_data_dir(io->base_bio) == READ)
 		process_read(io);
+}
+
+static void kcryptd_do_crypt(struct work_struct *work)
+{
+	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+
+	if (bio_data_dir(io->base_bio) == READ)
+		process_read_endio(io);
 	else
 		process_write(io);
 }
@@ -690,7 +715,7 @@
 	cc->key_size = key_size; /* initial settings */
 
 	if ((!key_size && strcmp(key, "-")) ||
-	    (key_size && crypt_decode_key(cc->key, key, key_size) < 0))
+	   (key_size && crypt_decode_key(cc->key, key, key_size) < 0))
 		return -EINVAL;
 
 	set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
@@ -746,7 +771,7 @@
 
  	if (crypt_set_key(cc, argv[1])) {
 		ti->error = "Error decoding key";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	/* Compatiblity mode for old dm-crypt cipher strings */
@@ -757,19 +782,19 @@
 
 	if (strcmp(chainmode, "ecb") && !ivmode) {
 		ti->error = "This chaining mode requires an IV mechanism";
-		goto bad1;
+		goto bad_cipher;
 	}
 
-	if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode, 
-		     cipher) >= CRYPTO_MAX_ALG_NAME) {
+	if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+		     chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) {
 		ti->error = "Chain mode + cipher name is too long";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm)) {
 		ti->error = "Error allocating crypto tfm";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	strcpy(cc->cipher, cipher);
@@ -793,18 +818,18 @@
 		cc->iv_gen_ops = &crypt_iv_null_ops;
 	else {
 		ti->error = "Invalid IV mode";
-		goto bad2;
+		goto bad_ivmode;
 	}
 
 	if (cc->iv_gen_ops && cc->iv_gen_ops->ctr &&
 	    cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0)
-		goto bad2;
+		goto bad_ivmode;
 
 	cc->iv_size = crypto_blkcipher_ivsize(tfm);
 	if (cc->iv_size)
 		/* at least a 64 bit sector number should fit in our buffer */
 		cc->iv_size = max(cc->iv_size,
-		                  (unsigned int)(sizeof(u64) / sizeof(u8)));
+				  (unsigned int)(sizeof(u64) / sizeof(u8)));
 	else {
 		if (cc->iv_gen_ops) {
 			DMWARN("Selected cipher does not support IVs");
@@ -817,13 +842,13 @@
 	cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
 	if (!cc->io_pool) {
 		ti->error = "Cannot allocate crypt io mempool";
-		goto bad3;
+		goto bad_slab_pool;
 	}
 
 	cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
 	if (!cc->page_pool) {
 		ti->error = "Cannot allocate page mempool";
-		goto bad4;
+		goto bad_page_pool;
 	}
 
 	cc->bs = bioset_create(MIN_IOS, MIN_IOS);
@@ -834,25 +859,25 @@
 
 	if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) {
 		ti->error = "Error setting key";
-		goto bad5;
+		goto bad_device;
 	}
 
 	if (sscanf(argv[2], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid iv_offset sector";
-		goto bad5;
+		goto bad_device;
 	}
 	cc->iv_offset = tmpll;
 
 	if (sscanf(argv[4], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid device sector";
-		goto bad5;
+		goto bad_device;
 	}
 	cc->start = tmpll;
 
 	if (dm_get_device(ti, argv[3], cc->start, ti->len,
-	                  dm_table_get_mode(ti->table), &cc->dev)) {
+			  dm_table_get_mode(ti->table), &cc->dev)) {
 		ti->error = "Device lookup failed";
-		goto bad5;
+		goto bad_device;
 	}
 
 	if (ivmode && cc->iv_gen_ops) {
@@ -861,27 +886,45 @@
 		cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL);
 		if (!cc->iv_mode) {
 			ti->error = "Error kmallocing iv_mode string";
-			goto bad5;
+			goto bad_ivmode_string;
 		}
 		strcpy(cc->iv_mode, ivmode);
 	} else
 		cc->iv_mode = NULL;
 
+	cc->io_queue = create_singlethread_workqueue("kcryptd_io");
+	if (!cc->io_queue) {
+		ti->error = "Couldn't create kcryptd io queue";
+		goto bad_io_queue;
+	}
+
+	cc->crypt_queue = create_singlethread_workqueue("kcryptd");
+	if (!cc->crypt_queue) {
+		ti->error = "Couldn't create kcryptd queue";
+		goto bad_crypt_queue;
+	}
+
 	ti->private = cc;
 	return 0;
 
-bad5:
+bad_crypt_queue:
+	destroy_workqueue(cc->io_queue);
+bad_io_queue:
+	kfree(cc->iv_mode);
+bad_ivmode_string:
+	dm_put_device(ti, cc->dev);
+bad_device:
 	bioset_free(cc->bs);
 bad_bs:
 	mempool_destroy(cc->page_pool);
-bad4:
+bad_page_pool:
 	mempool_destroy(cc->io_pool);
-bad3:
+bad_slab_pool:
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
-bad2:
+bad_ivmode:
 	crypto_free_blkcipher(tfm);
-bad1:
+bad_cipher:
 	/* Must zero key material before freeing */
 	memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
 	kfree(cc);
@@ -892,7 +935,8 @@
 {
 	struct crypt_config *cc = (struct crypt_config *) ti->private;
 
-	flush_workqueue(_kcryptd_workqueue);
+	destroy_workqueue(cc->io_queue);
+	destroy_workqueue(cc->crypt_queue);
 
 	bioset_free(cc->bs);
 	mempool_destroy(cc->page_pool);
@@ -918,9 +962,13 @@
 	io = mempool_alloc(cc->io_pool, GFP_NOIO);
 	io->target = ti;
 	io->base_bio = bio;
-	io->error = io->post_process = 0;
+	io->error = 0;
 	atomic_set(&io->pending, 0);
-	kcryptd_queue_io(io);
+
+	if (bio_data_dir(io->base_bio) == READ)
+		kcryptd_queue_io(io);
+	else
+		kcryptd_queue_crypt(io);
 
 	return DM_MAPIO_SUBMITTED;
 }
@@ -1037,25 +1085,12 @@
 	if (!_crypt_io_pool)
 		return -ENOMEM;
 
-	_kcryptd_workqueue = create_workqueue("kcryptd");
-	if (!_kcryptd_workqueue) {
-		r = -ENOMEM;
-		DMERR("couldn't create kcryptd");
-		goto bad1;
-	}
-
 	r = dm_register_target(&crypt_target);
 	if (r < 0) {
 		DMERR("register failed %d", r);
-		goto bad2;
+		kmem_cache_destroy(_crypt_io_pool);
 	}
 
-	return 0;
-
-bad2:
-	destroy_workqueue(_kcryptd_workqueue);
-bad1:
-	kmem_cache_destroy(_crypt_io_pool);
 	return r;
 }
 
@@ -1066,7 +1101,6 @@
 	if (r < 0)
 		DMERR("unregister failed %d", r);
 
-	destroy_workqueue(_kcryptd_workqueue);
 	kmem_cache_destroy(_crypt_io_pool);
 }
 
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 6928c13..bdd37f8 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -83,7 +83,7 @@
 	struct dm_delay_info *delayed, *next;
 	unsigned long next_expires = 0;
 	int start_timer = 0;
-	BIO_LIST(flush_bios);
+	struct bio_list flush_bios = { };
 
 	mutex_lock(&delayed_bios_lock);
 	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
@@ -163,34 +163,32 @@
 		goto bad;
 	}
 
-	if (argc == 3) {
-		dc->dev_write = NULL;
+	dc->dev_write = NULL;
+	if (argc == 3)
 		goto out;
-	}
 
 	if (sscanf(argv[4], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid write device sector";
-		goto bad;
+		goto bad_dev_read;
 	}
 	dc->start_write = tmpll;
 
 	if (sscanf(argv[5], "%u", &dc->write_delay) != 1) {
 		ti->error = "Invalid write delay";
-		goto bad;
+		goto bad_dev_read;
 	}
 
 	if (dm_get_device(ti, argv[3], dc->start_write, ti->len,
 			  dm_table_get_mode(ti->table), &dc->dev_write)) {
 		ti->error = "Write device lookup failed";
-		dm_put_device(ti, dc->dev_read);
-		goto bad;
+		goto bad_dev_read;
 	}
 
 out:
 	dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache);
 	if (!dc->delayed_pool) {
 		DMERR("Couldn't create delayed bio pool.");
-		goto bad;
+		goto bad_dev_write;
 	}
 
 	setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
@@ -203,6 +201,11 @@
 	ti->private = dc;
 	return 0;
 
+bad_dev_write:
+	if (dc->dev_write)
+		dm_put_device(ti, dc->dev_write);
+bad_dev_read:
+	dm_put_device(ti, dc->dev_read);
 bad:
 	kfree(dc);
 	return -EINVAL;
@@ -305,7 +308,7 @@
 		       (unsigned long long) dc->start_read,
 		       dc->read_delay);
 		if (dc->dev_write)
-			DMEMIT("%s %llu %u", dc->dev_write->name,
+			DMEMIT(" %s %llu %u", dc->dev_write->name,
 			       (unsigned long long) dc->start_write,
 			       dc->write_delay);
 		break;
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index 3425172..6b91b9a 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -81,7 +81,7 @@
 	}
 
 	if (bio_add_page(bio, page, data_size, 0) != data_size) {
-		DMERR("get_failover_bio: alloc_page() failed.");
+		DMERR("get_failover_bio: bio_add_page() failed.");
 		__free_page(page);
 		bio_put(bio);
 		return NULL;
@@ -211,12 +211,10 @@
 
 static struct emc_handler *alloc_emc_handler(void)
 {
-	struct emc_handler *h = kmalloc(sizeof(*h), GFP_KERNEL);
+	struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL);
 
-	if (h) {
-		memset(h, 0, sizeof(*h));
+	if (h)
 		spin_lock_init(&h->lock);
-	}
 
 	return h;
 }
diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c
index baafaab..2ee84d8 100644
--- a/drivers/md/dm-hw-handler.c
+++ b/drivers/md/dm-hw-handler.c
@@ -91,12 +91,10 @@
 
 static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht)
 {
-	struct hwh_internal *hwhi = kmalloc(sizeof(*hwhi), GFP_KERNEL);
+	struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL);
 
-	if (hwhi) {
-		memset(hwhi, 0, sizeof(*hwhi));
+	if (hwhi)
 		hwhi->hwht = *hwht;
-	}
 
 	return hwhi;
 }
diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h
index e0832e6..46809dc 100644
--- a/drivers/md/dm-hw-handler.h
+++ b/drivers/md/dm-hw-handler.h
@@ -58,5 +58,6 @@
 #define MP_FAIL_PATH 1
 #define MP_BYPASS_PG 2
 #define MP_ERROR_IO  4	/* Don't retry this I/O */
+#define MP_RETRY 8
 
 #endif
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index b441d82..138200b 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -700,7 +700,7 @@
 	int r;
 	char *new_name = (char *) param + param->data_start;
 
-	if (new_name < (char *) (param + 1) ||
+	if (new_name < (char *) param->data ||
 	    invalid_str(new_name, (void *) param + param_size)) {
 		DMWARN("Invalid new logical volume name supplied.");
 		return -EINVAL;
@@ -726,7 +726,7 @@
 	if (!md)
 		return -ENXIO;
 
-	if (geostr < (char *) (param + 1) ||
+	if (geostr < (char *) param->data ||
 	    invalid_str(geostr, (void *) param + param_size)) {
 		DMWARN("Invalid geometry supplied.");
 		goto out;
@@ -1233,7 +1233,7 @@
 	if (r)
 		goto out;
 
-	if (tmsg < (struct dm_target_msg *) (param + 1) ||
+	if (tmsg < (struct dm_target_msg *) param->data ||
 	    invalid_str(tmsg->message, (void *) param + param_size)) {
 		DMWARN("Invalid target message parameters.");
 		r = -EINVAL;
@@ -1358,7 +1358,7 @@
 	if (tmp.data_size < sizeof(tmp))
 		return -EINVAL;
 
-	dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
+	dmi = vmalloc(tmp.data_size);
 	if (!dmi)
 		return -ENOMEM;
 
@@ -1515,3 +1515,35 @@
 
 	dm_hash_exit();
 }
+
+/**
+ * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers
+ * @md: Pointer to mapped_device
+ * @name: Buffer (size DM_NAME_LEN) for name
+ * @uuid: Buffer (size DM_UUID_LEN) for uuid or empty string if uuid not defined
+ */
+int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
+{
+	int r = 0;
+	struct hash_cell *hc;
+
+	if (!md)
+		return -ENXIO;
+
+	dm_get(md);
+	down_read(&_hash_lock);
+	hc = dm_get_mdptr(md);
+	if (!hc || hc->md != md) {
+		r = -ENXIO;
+		goto out;
+	}
+
+	strcpy(name, hc->name);
+	strcpy(uuid, hc->uuid ? : "");
+
+out:
+	up_read(&_hash_lock);
+	dm_put(md);
+
+	return r;
+}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index a66428d8..072ee43 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -696,7 +696,7 @@
 	.module = THIS_MODULE,
 	.ctr = disk_ctr,
 	.dtr = disk_dtr,
-	.suspend = disk_flush,
+	.postsuspend = disk_flush,
 	.resume = disk_resume,
 	.get_region_size = core_get_region_size,
 	.is_clean = core_is_clean,
diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h
index 86a301c..3fae87e 100644
--- a/drivers/md/dm-log.h
+++ b/drivers/md/dm-log.h
@@ -32,7 +32,8 @@
 	 * There are times when we don't want the log to touch
 	 * the disk.
 	 */
-	int (*suspend)(struct dirty_log *log);
+	int (*presuspend)(struct dirty_log *log);
+	int (*postsuspend)(struct dirty_log *log);
 	int (*resume)(struct dirty_log *log);
 
 	/*
diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c
new file mode 100644
index 0000000..204bf42
--- /dev/null
+++ b/drivers/md/dm-mpath-hp-sw.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2005 Mike Christie, All rights reserved.
+ * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
+ * Authors: Mike Christie
+ *          Dave Wysochanski
+ *
+ * This file is released under the GPL.
+ *
+ * This module implements the specific path activation code for
+ * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive)
+ * storage arrays.
+ * These storage arrays have controller-based failover, not
+ * LUN-based failover.  However, LUN-based failover is the design
+ * of dm-multipath. Thus, this module is written for LUN-based failover.
+ */
+#include <linux/blkdev.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
+
+#include "dm.h"
+#include "dm-hw-handler.h"
+
+#define DM_MSG_PREFIX "multipath hp-sw"
+#define DM_HP_HWH_NAME "hp-sw"
+#define DM_HP_HWH_VER "1.0.0"
+
+struct hp_sw_context {
+	unsigned char sense[SCSI_SENSE_BUFFERSIZE];
+};
+
+/*
+ * hp_sw_error_is_retryable - Is an HP-specific check condition retryable?
+ * @req: path activation request
+ *
+ * Examine error codes of request and determine whether the error is retryable.
+ * Some error codes are already retried by scsi-ml (see
+ * scsi_decide_disposition), but some HP specific codes are not.
+ * The intent of this routine is to supply the logic for the HP specific
+ * check conditions.
+ *
+ * Returns:
+ *  1 - command completed with retryable error
+ *  0 - command completed with non-retryable error
+ *
+ * Possible optimizations
+ * 1. More hardware-specific error codes
+ */
+static int hp_sw_error_is_retryable(struct request *req)
+{
+	/*
+	 * NOT_READY is known to be retryable
+	 * For now we just dump out the sense data and call it retryable
+	 */
+	if (status_byte(req->errors) == CHECK_CONDITION)
+		__scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len);
+
+	/*
+	 * At this point we don't have complete information about all the error
+	 * codes from this hardware, so we are just conservative and retry
+	 * when in doubt.
+	 */
+	return 1;
+}
+
+/*
+ * hp_sw_end_io - Completion handler for HP path activation.
+ * @req: path activation request
+ * @error: scsi-ml error
+ *
+ *  Check sense data, free request structure, and notify dm that
+ *  pg initialization has completed.
+ *
+ * Context: scsi-ml softirq
+ *
+ */
+static void hp_sw_end_io(struct request *req, int error)
+{
+	struct dm_path *path = req->end_io_data;
+	unsigned err_flags = 0;
+
+	if (!error) {
+		DMDEBUG("%s path activation command - success",
+			path->dev->name);
+		goto out;
+	}
+
+	if (hp_sw_error_is_retryable(req)) {
+		DMDEBUG("%s path activation command - retry",
+			path->dev->name);
+		err_flags = MP_RETRY;
+		goto out;
+	}
+
+	DMWARN("%s path activation fail - error=0x%x",
+	       path->dev->name, error);
+	err_flags = MP_FAIL_PATH;
+
+out:
+	req->end_io_data = NULL;
+	__blk_put_request(req->q, req);
+	dm_pg_init_complete(path, err_flags);
+}
+
+/*
+ * hp_sw_get_request - Allocate an HP specific path activation request
+ * @path: path on which request will be sent (needed for request queue)
+ *
+ * The START command is used for path activation request.
+ * These arrays are controller-based failover, not LUN based.
+ * One START command issued to a single path will fail over all
+ * LUNs for the same controller.
+ *
+ * Possible optimizations
+ * 1. Make timeout configurable
+ * 2. Preallocate request
+ */
+static struct request *hp_sw_get_request(struct dm_path *path)
+{
+	struct request *req;
+	struct block_device *bdev = path->dev->bdev;
+	struct request_queue *q = bdev_get_queue(bdev);
+	struct hp_sw_context *h = path->hwhcontext;
+
+	req = blk_get_request(q, WRITE, GFP_NOIO);
+	if (!req)
+		goto out;
+
+	req->timeout = 60 * HZ;
+
+	req->errors = 0;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
+	req->end_io_data = path;
+	req->sense = h->sense;
+	memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
+
+	memset(&req->cmd, 0, BLK_MAX_CDB);
+	req->cmd[0] = START_STOP;
+	req->cmd[4] = 1;
+	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
+
+out:
+	return req;
+}
+
+/*
+ * hp_sw_pg_init - HP path activation implementation.
+ * @hwh: hardware handler specific data
+ * @bypassed: unused; is the path group bypassed? (see dm-mpath.c)
+ * @path: path to send initialization command
+ *
+ * Send an HP-specific path activation command on 'path'.
+ * Do not try to optimize in any way, just send the activation command.
+ * More than one path activation command may be sent to the same controller.
+ * This seems to work fine for basic failover support.
+ *
+ * Possible optimizations
+ * 1. Detect an in-progress activation request and avoid submitting another one
+ * 2. Model the controller and only send a single activation request at a time
+ * 3. Determine the state of a path before sending an activation request
+ *
+ * Context: kmpathd (see process_queued_ios() in dm-mpath.c)
+ */
+static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed,
+			  struct dm_path *path)
+{
+	struct request *req;
+	struct hp_sw_context *h;
+
+	path->hwhcontext = hwh->context;
+	h = hwh->context;
+
+	req = hp_sw_get_request(path);
+	if (!req) {
+		DMERR("%s path activation command - allocation fail",
+		      path->dev->name);
+		goto retry;
+	}
+
+	DMDEBUG("%s path activation command - sent", path->dev->name);
+
+	blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io);
+	return;
+
+retry:
+	dm_pg_init_complete(path, MP_RETRY);
+}
+
+static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv)
+{
+	struct hp_sw_context *h;
+
+	h = kmalloc(sizeof(*h), GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	hwh->context = h;
+
+	return 0;
+}
+
+static void hp_sw_destroy(struct hw_handler *hwh)
+{
+	struct hp_sw_context *h = hwh->context;
+
+	kfree(h);
+}
+
+static struct hw_handler_type hp_sw_hwh = {
+	.name = DM_HP_HWH_NAME,
+	.module = THIS_MODULE,
+	.create = hp_sw_create,
+	.destroy = hp_sw_destroy,
+	.pg_init = hp_sw_pg_init,
+};
+
+static int __init hp_sw_init(void)
+{
+	int r;
+
+	r = dm_register_hw_handler(&hp_sw_hwh);
+	if (r < 0)
+		DMERR("register failed %d", r);
+	else
+		DMINFO("version " DM_HP_HWH_VER " loaded");
+
+	return r;
+}
+
+static void __exit hp_sw_exit(void)
+{
+	int r;
+
+	r = dm_unregister_hw_handler(&hp_sw_hwh);
+	if (r < 0)
+		DMERR("unregister failed %d", r);
+}
+
+module_init(hp_sw_init);
+module_exit(hp_sw_exit);
+
+MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support");
+MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DM_HP_HWH_VER);
diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c
index 16b1613..e04eb5c 100644
--- a/drivers/md/dm-mpath-rdac.c
+++ b/drivers/md/dm-mpath-rdac.c
@@ -664,20 +664,21 @@
 
 static int __init rdac_init(void)
 {
-	int r = dm_register_hw_handler(&rdac_handler);
-
-	if (r < 0) {
-		DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
-		return r;
-	}
+	int r;
 
 	rdac_wkqd = create_singlethread_workqueue("rdac_wkqd");
 	if (!rdac_wkqd) {
 		DMERR("Failed to create workqueue rdac_wkqd.");
-		dm_unregister_hw_handler(&rdac_handler);
 		return -ENOMEM;
 	}
 
+	r = dm_register_hw_handler(&rdac_handler);
+	if (r < 0) {
+		DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
+		destroy_workqueue(rdac_wkqd);
+		return r;
+	}
+
 	DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER);
 	return 0;
 }
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 31056ab..24b2b1e 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -10,6 +10,7 @@
 #include "dm-hw-handler.h"
 #include "dm-bio-list.h"
 #include "dm-bio-record.h"
+#include "dm-uevent.h"
 
 #include <linux/ctype.h>
 #include <linux/init.h>
@@ -75,6 +76,8 @@
 	unsigned queue_io;		/* Must we queue all I/O? */
 	unsigned queue_if_no_path;	/* Queue I/O if last path fails? */
 	unsigned saved_queue_if_no_path;/* Saved state during suspension */
+	unsigned pg_init_retries;	/* Number of times to retry pg_init */
+	unsigned pg_init_count;		/* Number of times pg_init called */
 
 	struct work_struct process_queued_ios;
 	struct bio_list queued_ios;
@@ -225,6 +228,8 @@
 		m->pg_init_required = 0;
 		m->queue_io = 0;
 	}
+
+	m->pg_init_count = 0;
 }
 
 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
@@ -424,6 +429,7 @@
 		must_queue = 0;
 
 	if (m->pg_init_required && !m->pg_init_in_progress) {
+		m->pg_init_count++;
 		m->pg_init_required = 0;
 		m->pg_init_in_progress = 1;
 		init_required = 1;
@@ -689,9 +695,11 @@
 	int r;
 	unsigned argc;
 	struct dm_target *ti = m->ti;
+	const char *param_name;
 
 	static struct param _params[] = {
-		{0, 1, "invalid number of feature args"},
+		{0, 3, "invalid number of feature args"},
+		{1, 50, "pg_init_retries must be between 1 and 50"},
 	};
 
 	r = read_param(_params, shift(as), &argc, &ti->error);
@@ -701,12 +709,28 @@
 	if (!argc)
 		return 0;
 
-	if (!strnicmp(shift(as), MESG_STR("queue_if_no_path")))
-		return queue_if_no_path(m, 1, 0);
-	else {
+	do {
+		param_name = shift(as);
+		argc--;
+
+		if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
+			r = queue_if_no_path(m, 1, 0);
+			continue;
+		}
+
+		if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
+		    (argc >= 1)) {
+			r = read_param(_params + 1, shift(as),
+				       &m->pg_init_retries, &ti->error);
+			argc--;
+			continue;
+		}
+
 		ti->error = "Unrecognised multipath feature request";
-		return -EINVAL;
-	}
+		r = -EINVAL;
+	} while (argc && !r);
+
+	return r;
 }
 
 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
@@ -834,6 +858,9 @@
 	if (pgpath == m->current_pgpath)
 		m->current_pgpath = NULL;
 
+	dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
+		      pgpath->path.dev->name, m->nr_valid_paths);
+
 	queue_work(kmultipathd, &m->trigger_event);
 
 out:
@@ -873,6 +900,9 @@
 	if (!m->nr_valid_paths++ && m->queue_size)
 		queue_work(kmultipathd, &m->process_queued_ios);
 
+	dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
+		      pgpath->path.dev->name, m->nr_valid_paths);
+
 	queue_work(kmultipathd, &m->trigger_event);
 
 out:
@@ -976,6 +1006,26 @@
 }
 
 /*
+ * Should we retry pg_init immediately?
+ */
+static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
+{
+	unsigned long flags;
+	int limit_reached = 0;
+
+	spin_lock_irqsave(&m->lock, flags);
+
+	if (m->pg_init_count <= m->pg_init_retries)
+		m->pg_init_required = 1;
+	else
+		limit_reached = 1;
+
+	spin_unlock_irqrestore(&m->lock, flags);
+
+	return limit_reached;
+}
+
+/*
  * pg_init must call this when it has completed its initialisation
  */
 void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
@@ -985,8 +1035,14 @@
 	struct multipath *m = pg->m;
 	unsigned long flags;
 
-	/* We insist on failing the path if the PG is already bypassed. */
-	if (err_flags && pg->bypassed)
+	/*
+	 * If requested, retry pg_init until maximum number of retries exceeded.
+	 * If retry not requested and PG already bypassed, always fail the path.
+	 */
+	if (err_flags & MP_RETRY) {
+		if (pg_init_limit_reached(m, pgpath))
+			err_flags |= MP_FAIL_PATH;
+	} else if (err_flags && pg->bypassed)
 		err_flags |= MP_FAIL_PATH;
 
 	if (err_flags & MP_FAIL_PATH)
@@ -996,7 +1052,7 @@
 		bypass_pg(m, pg, 1);
 
 	spin_lock_irqsave(&m->lock, flags);
-	if (err_flags) {
+	if (err_flags & ~MP_RETRY) {
 		m->current_pgpath = NULL;
 		m->current_pg = NULL;
 	} else if (!m->pg_init_required)
@@ -1148,11 +1204,15 @@
 
 	/* Features */
 	if (type == STATUSTYPE_INFO)
-		DMEMIT("1 %u ", m->queue_size);
-	else if (m->queue_if_no_path)
-		DMEMIT("1 queue_if_no_path ");
-	else
-		DMEMIT("0 ");
+		DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
+	else {
+		DMEMIT("%u ", m->queue_if_no_path +
+			      (m->pg_init_retries > 0) * 2);
+		if (m->queue_if_no_path)
+			DMEMIT("queue_if_no_path ");
+		if (m->pg_init_retries)
+			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
+	}
 
 	if (hwh->type && hwh->type->status)
 		sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index f10a0c8..ca1bb63 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -94,12 +94,10 @@
 
 static struct ps_internal *_alloc_path_selector(struct path_selector_type *pst)
 {
-	struct ps_internal *psi = kmalloc(sizeof(*psi), GFP_KERNEL);
+	struct ps_internal *psi = kzalloc(sizeof(*psi), GFP_KERNEL);
 
-	if (psi) {
-		memset(psi, 0, sizeof(*psi));
+	if (psi)
 		psi->pst = *pst;
-	}
 
 	return psi;
 }
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d09ff15..31123d4a 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -19,6 +19,7 @@
 #include <linux/time.h>
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
+#include <linux/log2.h>
 
 #define DM_MSG_PREFIX "raid1"
 #define DM_IO_PAGES 64
@@ -113,6 +114,7 @@
  * Mirror set structures.
  *---------------------------------------------------------------*/
 struct mirror {
+	struct mirror_set *ms;
 	atomic_t error_count;
 	struct dm_dev *dev;
 	sector_t offset;
@@ -974,6 +976,7 @@
 
 	if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
 		ti->error = "Error creating dirty region hash";
+		dm_io_client_destroy(ms->io_client);
 		kfree(ms);
 		return NULL;
 	}
@@ -994,7 +997,7 @@
 
 static inline int _check_region_size(struct dm_target *ti, uint32_t size)
 {
-	return !(size % (PAGE_SIZE >> 9) || (size & (size - 1)) ||
+	return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) ||
 		 size > ti->len);
 }
 
@@ -1015,6 +1018,7 @@
 		return -ENXIO;
 	}
 
+	ms->mirror[mirror].ms = ms;
 	ms->mirror[mirror].offset = offset;
 
 	return 0;
@@ -1163,16 +1167,14 @@
 	ms->kmirrord_wq = create_singlethread_workqueue("kmirrord");
 	if (!ms->kmirrord_wq) {
 		DMERR("couldn't start kmirrord");
-		free_context(ms, ti, m);
-		return -ENOMEM;
+		r = -ENOMEM;
+		goto err_free_context;
 	}
 	INIT_WORK(&ms->kmirrord_work, do_mirror);
 
 	r = parse_features(ms, argc, argv, &args_used);
-	if (r) {
-		free_context(ms, ti, ms->nr_mirrors);
-		return r;
-	}
+	if (r)
+		goto err_destroy_wq;
 
 	argv += args_used;
 	argc -= args_used;
@@ -1188,19 +1190,22 @@
 
 	if (argc) {
 		ti->error = "Too many mirror arguments";
-		free_context(ms, ti, ms->nr_mirrors);
-		return -EINVAL;
+		r = -EINVAL;
+		goto err_destroy_wq;
 	}
 
 	r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
-	if (r) {
-		destroy_workqueue(ms->kmirrord_wq);
-		free_context(ms, ti, ms->nr_mirrors);
-		return r;
-	}
+	if (r)
+		goto err_destroy_wq;
 
 	wake(ms);
 	return 0;
+
+err_destroy_wq:
+	destroy_workqueue(ms->kmirrord_wq);
+err_free_context:
+	free_context(ms, ti, ms->nr_mirrors);
+	return r;
 }
 
 static void mirror_dtr(struct dm_target *ti)
@@ -1302,7 +1307,7 @@
 	wait_event(_kmirrord_recovery_stopped,
 		   !atomic_read(&ms->rh.recovery_in_flight));
 
-	if (log->type->suspend && log->type->suspend(log))
+	if (log->type->postsuspend && log->type->postsuspend(log))
 		/* FIXME: need better error handling */
 		DMWARN("log suspend failed");
 }
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 98a633f..cee16fa 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/log2.h>
 
 #include "dm-snap.h"
 #include "dm-bio-list.h"
@@ -415,7 +416,7 @@
 	chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
 
 	/* Check chunk_size is a power of 2 */
-	if (chunk_size & (chunk_size - 1)) {
+	if (!is_power_of_2(chunk_size)) {
 		*error = "Chunk size is not a power of 2";
 		return -EINVAL;
 	}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 51f5e07..969944a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -11,6 +11,7 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/slab.h>
+#include <linux/log2.h>
 
 #define DM_MSG_PREFIX "striped"
 
@@ -99,7 +100,7 @@
 	/*
 	 * chunk_size is a power of two
 	 */
-	if (!chunk_size || (chunk_size & (chunk_size - 1)) ||
+	if (!is_power_of_2(chunk_size) ||
 	    (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) {
 		ti->error = "Invalid chunk size";
 		return -EINVAL;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index fbe477b..8939e61 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -213,12 +213,11 @@
 int dm_table_create(struct dm_table **result, int mode,
 		    unsigned num_targets, struct mapped_device *md)
 {
-	struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
 
 	if (!t)
 		return -ENOMEM;
 
-	memset(t, 0, sizeof(*t));
 	INIT_LIST_HEAD(&t->devices);
 	atomic_set(&t->holders, 1);
 
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 477a041..835cf95 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -88,12 +88,10 @@
 
 static struct tt_internal *alloc_target(struct target_type *t)
 {
-	struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+	struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
 
-	if (ti) {
-		memset(ti, 0, sizeof(*ti));
+	if (ti)
 		ti->tt = *t;
-	}
 
 	return ti;
 }
diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c
new file mode 100644
index 0000000..50377e5
--- /dev/null
+++ b/drivers/md/dm-uevent.c
@@ -0,0 +1,222 @@
+/*
+ * Device Mapper Uevent Support (dm-uevent)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007
+ * 	Author: Mike Anderson <andmike@linux.vnet.ibm.com>
+ */
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/kobject.h>
+#include <linux/dm-ioctl.h>
+
+#include "dm.h"
+#include "dm-uevent.h"
+
+#define DM_MSG_PREFIX "uevent"
+
+static const struct {
+	enum dm_uevent_type type;
+	enum kobject_action action;
+	char *name;
+} _dm_uevent_type_names[] = {
+	{DM_UEVENT_PATH_FAILED, KOBJ_CHANGE, "PATH_FAILED"},
+	{DM_UEVENT_PATH_REINSTATED, KOBJ_CHANGE, "PATH_REINSTATED"},
+};
+
+static struct kmem_cache *_dm_event_cache;
+
+struct dm_uevent {
+	struct mapped_device *md;
+	enum kobject_action action;
+	struct kobj_uevent_env ku_env;
+	struct list_head elist;
+	char name[DM_NAME_LEN];
+	char uuid[DM_UUID_LEN];
+};
+
+static void dm_uevent_free(struct dm_uevent *event)
+{
+	kmem_cache_free(_dm_event_cache, event);
+}
+
+static struct dm_uevent *dm_uevent_alloc(struct mapped_device *md)
+{
+	struct dm_uevent *event;
+
+	event = kmem_cache_zalloc(_dm_event_cache, GFP_ATOMIC);
+	if (!event)
+		return NULL;
+
+	INIT_LIST_HEAD(&event->elist);
+	event->md = md;
+
+	return event;
+}
+
+static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md,
+					      struct dm_target *ti,
+					      enum kobject_action action,
+					      const char *dm_action,
+					      const char *path,
+					      unsigned nr_valid_paths)
+{
+	struct dm_uevent *event;
+
+	event = dm_uevent_alloc(md);
+	if (!event) {
+		DMERR("%s: dm_uevent_alloc() failed", __FUNCTION__);
+		goto err_nomem;
+	}
+
+	event->action = action;
+
+	if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) {
+		DMERR("%s: add_uevent_var() for DM_TARGET failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) {
+		DMERR("%s: add_uevent_var() for DM_ACTION failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u",
+			   dm_next_uevent_seq(md))) {
+		DMERR("%s: add_uevent_var() for DM_SEQNUM failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) {
+		DMERR("%s: add_uevent_var() for DM_PATH failed", __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d",
+			   nr_valid_paths)) {
+		DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	return event;
+
+err_add:
+	dm_uevent_free(event);
+err_nomem:
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * dm_send_uevents - send uevents for given list
+ *
+ * @events:	list of events to send
+ * @kobj:	kobject generating event
+ *
+ */
+void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+{
+	int r;
+	struct dm_uevent *event, *next;
+
+	list_for_each_entry_safe(event, next, events, elist) {
+		list_del_init(&event->elist);
+
+		/*
+		 * Need to call dm_copy_name_and_uuid from here for now.
+		 * Context of previous var adds and locking used for
+		 * hash_cell not compatable.
+		 */
+		if (dm_copy_name_and_uuid(event->md, event->name,
+					  event->uuid)) {
+			DMERR("%s: dm_copy_name_and_uuid() failed",
+			      __FUNCTION__);
+			goto uevent_free;
+		}
+
+		if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) {
+			DMERR("%s: add_uevent_var() for DM_NAME failed",
+			      __FUNCTION__);
+			goto uevent_free;
+		}
+
+		if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) {
+			DMERR("%s: add_uevent_var() for DM_UUID failed",
+			      __FUNCTION__);
+			goto uevent_free;
+		}
+
+		r = kobject_uevent_env(kobj, event->action, event->ku_env.envp);
+		if (r)
+			DMERR("%s: kobject_uevent_env failed", __FUNCTION__);
+uevent_free:
+		dm_uevent_free(event);
+	}
+}
+EXPORT_SYMBOL_GPL(dm_send_uevents);
+
+/**
+ * dm_path_uevent - called to create a new path event and queue it
+ *
+ * @event_type:	path event type enum
+ * @ti:			pointer to a dm_target
+ * @path:		string containing pathname
+ * @nr_valid_paths:	number of valid paths remaining
+ *
+ */
+void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+		   const char *path, unsigned nr_valid_paths)
+{
+	struct mapped_device *md = dm_table_get_md(ti->table);
+	struct dm_uevent *event;
+
+	if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) {
+		DMERR("%s: Invalid event_type %d", __FUNCTION__, event_type);
+		goto out;
+	}
+
+	event = dm_build_path_uevent(md, ti,
+				     _dm_uevent_type_names[event_type].action,
+				     _dm_uevent_type_names[event_type].name,
+				     path, nr_valid_paths);
+	if (IS_ERR(event))
+		goto out;
+
+	dm_uevent_add(md, &event->elist);
+
+out:
+	dm_put(md);
+}
+EXPORT_SYMBOL_GPL(dm_path_uevent);
+
+int dm_uevent_init(void)
+{
+	_dm_event_cache = KMEM_CACHE(dm_uevent, 0);
+	if (!_dm_event_cache)
+		return -ENOMEM;
+
+	DMINFO("version 1.0.3");
+
+	return 0;
+}
+
+void dm_uevent_exit(void)
+{
+	kmem_cache_destroy(_dm_event_cache);
+}
diff --git a/drivers/md/dm-uevent.h b/drivers/md/dm-uevent.h
new file mode 100644
index 0000000..2eccc8b
--- /dev/null
+++ b/drivers/md/dm-uevent.h
@@ -0,0 +1,59 @@
+/*
+ * Device Mapper Uevent Support
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007
+ * 	Author: Mike Anderson <andmike@linux.vnet.ibm.com>
+ */
+#ifndef DM_UEVENT_H
+#define DM_UEVENT_H
+
+enum dm_uevent_type {
+	DM_UEVENT_PATH_FAILED,
+	DM_UEVENT_PATH_REINSTATED,
+};
+
+#ifdef CONFIG_DM_UEVENT
+
+extern int dm_uevent_init(void);
+extern void dm_uevent_exit(void);
+extern void dm_send_uevents(struct list_head *events, struct kobject *kobj);
+extern void dm_path_uevent(enum dm_uevent_type event_type,
+			   struct dm_target *ti, const char *path,
+			   unsigned nr_valid_paths);
+
+#else
+
+static inline int dm_uevent_init(void)
+{
+	return 0;
+}
+static inline void dm_uevent_exit(void)
+{
+}
+static inline void dm_send_uevents(struct list_head *events,
+				   struct kobject *kobj)
+{
+}
+static inline void dm_path_uevent(enum dm_uevent_type event_type,
+				  struct dm_target *ti, const char *path,
+				  unsigned nr_valid_paths)
+{
+}
+
+#endif	/* CONFIG_DM_UEVENT */
+
+#endif	/* DM_UEVENT_H */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d837d37..07cbbb8 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -7,6 +7,7 @@
 
 #include "dm.h"
 #include "dm-bio-list.h"
+#include "dm-uevent.h"
 
 #include <linux/init.h>
 #include <linux/module.h>
@@ -112,6 +113,9 @@
 	 */
 	atomic_t event_nr;
 	wait_queue_head_t eventq;
+	atomic_t uevent_seq;
+	struct list_head uevent_list;
+	spinlock_t uevent_lock; /* Protect access to uevent_list */
 
 	/*
 	 * freeze/thaw support require holding onto a super block
@@ -143,11 +147,19 @@
 		return -ENOMEM;
 	}
 
+	r = dm_uevent_init();
+	if (r) {
+		kmem_cache_destroy(_tio_cache);
+		kmem_cache_destroy(_io_cache);
+		return r;
+	}
+
 	_major = major;
 	r = register_blkdev(_major, _name);
 	if (r < 0) {
 		kmem_cache_destroy(_tio_cache);
 		kmem_cache_destroy(_io_cache);
+		dm_uevent_exit();
 		return r;
 	}
 
@@ -162,6 +174,7 @@
 	kmem_cache_destroy(_tio_cache);
 	kmem_cache_destroy(_io_cache);
 	unregister_blkdev(_major, _name);
+	dm_uevent_exit();
 
 	_major = 0;
 
@@ -751,15 +764,13 @@
 /*
  * Split the bio into several clones.
  */
-static void __split_bio(struct mapped_device *md, struct bio *bio)
+static int __split_bio(struct mapped_device *md, struct bio *bio)
 {
 	struct clone_info ci;
 
 	ci.map = dm_get_table(md);
-	if (!ci.map) {
-		bio_io_error(bio);
-		return;
-	}
+	if (unlikely(!ci.map))
+		return -EIO;
 
 	ci.md = md;
 	ci.bio = bio;
@@ -779,6 +790,8 @@
 	/* drop the extra reference count */
 	dec_pending(ci.io, 0);
 	dm_table_put(ci.map);
+
+	return 0;
 }
 /*-----------------------------------------------------------------
  * CRUD END
@@ -790,7 +803,7 @@
  */
 static int dm_request(struct request_queue *q, struct bio *bio)
 {
-	int r;
+	int r = -EIO;
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
 
@@ -815,18 +828,11 @@
 	while (test_bit(DMF_BLOCK_IO, &md->flags)) {
 		up_read(&md->io_lock);
 
-		if (bio_rw(bio) == READA) {
-			bio_io_error(bio);
-			return 0;
-		}
+		if (bio_rw(bio) != READA)
+			r = queue_io(md, bio);
 
-		r = queue_io(md, bio);
-		if (r < 0) {
-			bio_io_error(bio);
-			return 0;
-
-		} else if (r == 0)
-			return 0;	/* deferred successfully */
+		if (r <= 0)
+			goto out_req;
 
 		/*
 		 * We're in a while loop, because someone could suspend
@@ -835,8 +841,13 @@
 		down_read(&md->io_lock);
 	}
 
-	__split_bio(md, bio);
+	r = __split_bio(md, bio);
 	up_read(&md->io_lock);
+
+out_req:
+	if (r < 0)
+		bio_io_error(bio);
+
 	return 0;
 }
 
@@ -977,6 +988,9 @@
 	atomic_set(&md->holders, 1);
 	atomic_set(&md->open_count, 0);
 	atomic_set(&md->event_nr, 0);
+	atomic_set(&md->uevent_seq, 0);
+	INIT_LIST_HEAD(&md->uevent_list);
+	spin_lock_init(&md->uevent_lock);
 
 	md->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!md->queue)
@@ -1044,12 +1058,14 @@
 	return NULL;
 }
 
+static void unlock_fs(struct mapped_device *md);
+
 static void free_dev(struct mapped_device *md)
 {
 	int minor = md->disk->first_minor;
 
 	if (md->suspended_bdev) {
-		thaw_bdev(md->suspended_bdev, NULL);
+		unlock_fs(md);
 		bdput(md->suspended_bdev);
 	}
 	mempool_destroy(md->tio_pool);
@@ -1073,8 +1089,16 @@
  */
 static void event_callback(void *context)
 {
+	unsigned long flags;
+	LIST_HEAD(uevents);
 	struct mapped_device *md = (struct mapped_device *) context;
 
+	spin_lock_irqsave(&md->uevent_lock, flags);
+	list_splice_init(&md->uevent_list, &uevents);
+	spin_unlock_irqrestore(&md->uevent_lock, flags);
+
+	dm_send_uevents(&uevents, &md->disk->kobj);
+
 	atomic_inc(&md->event_nr);
 	wake_up(&md->eventq);
 }
@@ -1233,7 +1257,8 @@
 	while (c) {
 		n = c->bi_next;
 		c->bi_next = NULL;
-		__split_bio(md, c);
+		if (__split_bio(md, c))
+			bio_io_error(c);
 		c = n;
 	}
 }
@@ -1491,6 +1516,11 @@
 /*-----------------------------------------------------------------
  * Event notification.
  *---------------------------------------------------------------*/
+uint32_t dm_next_uevent_seq(struct mapped_device *md)
+{
+	return atomic_add_return(1, &md->uevent_seq);
+}
+
 uint32_t dm_get_event_nr(struct mapped_device *md)
 {
 	return atomic_read(&md->event_nr);
@@ -1502,6 +1532,15 @@
 			(event_nr != atomic_read(&md->event_nr)));
 }
 
+void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&md->uevent_lock, flags);
+	list_add(elist, &md->uevent_list);
+	spin_unlock_irqrestore(&md->uevent_lock, flags);
+}
+
 /*
  * The gendisk is only valid as long as you have a reference
  * count on 'md'.
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index 7e05237..f3831f3 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -198,7 +198,7 @@
 	 * These fields are only used if the job has been split
 	 * into more manageable parts.
 	 */
-	struct semaphore lock;
+	struct mutex lock;
 	atomic_t sub_jobs;
 	sector_t progress;
 };
@@ -456,7 +456,7 @@
 	sector_t count = 0;
 	struct kcopyd_job *job = (struct kcopyd_job *) context;
 
-	down(&job->lock);
+	mutex_lock(&job->lock);
 
 	/* update the error */
 	if (read_err)
@@ -480,7 +480,7 @@
 			job->progress += count;
 		}
 	}
-	up(&job->lock);
+	mutex_unlock(&job->lock);
 
 	if (count) {
 		int i;
@@ -562,7 +562,7 @@
 		dispatch_job(job);
 
 	else {
-		init_MUTEX(&job->lock);
+		mutex_init(&job->lock);
 		job->progress = 0;
 		split_job(job);
 	}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c059ae6..808cd95 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4717,7 +4717,7 @@
 
 void md_unregister_thread(mdk_thread_t *thread)
 {
-	dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid);
+	dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
 
 	kthread_stop(thread->tsk);
 	kfree(thread);
diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c
index 5a12b56..154a7ce 100644
--- a/drivers/media/dvb/cinergyT2/cinergyT2.c
+++ b/drivers/media/dvb/cinergyT2/cinergyT2.c
@@ -820,7 +820,7 @@
 
 	input_dev->name = DRIVER_NAME " remote control";
 	input_dev->phys = cinergyt2->phys;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	for (i = 0; i < ARRAY_SIZE(rc_keys); i += 3)
 		set_bit(rc_keys[i + 2], input_dev->keybit);
 	input_dev->keycodesize = 0;
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
index 7b9f35b..c0c2c22 100644
--- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
+++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
@@ -106,7 +106,7 @@
 	if (!input_dev)
 		return -ENOMEM;
 
-	input_dev->evbit[0] = BIT(EV_KEY);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
 	input_dev->name = "IR-receiver inside an USB DVB receiver";
 	input_dev->phys = d->rc_phys;
 	usb_to_input_id(d->udev, &input_dev->id);
diff --git a/drivers/media/dvb/ttpci/av7110_ir.c b/drivers/media/dvb/ttpci/av7110_ir.c
index 5d19c40..a283e1d 100644
--- a/drivers/media/dvb/ttpci/av7110_ir.c
+++ b/drivers/media/dvb/ttpci/av7110_ir.c
@@ -27,7 +27,7 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/kernel.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 #include "av7110.h"
 #include "av7110_hw.h"
diff --git a/drivers/media/dvb/ttusb-dec/ttusb_dec.c b/drivers/media/dvb/ttusb-dec/ttusb_dec.c
index 5e691fd..1ec981d 100644
--- a/drivers/media/dvb/ttusb-dec/ttusb_dec.c
+++ b/drivers/media/dvb/ttusb-dec/ttusb_dec.c
@@ -1198,7 +1198,7 @@
 
 	input_dev->name = "ttusb_dec remote control";
 	input_dev->phys = dec->rc_phys;
-	input_dev->evbit[0] = BIT(EV_KEY);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
 	input_dev->keycodesize = sizeof(u16);
 	input_dev->keycodemax = 0x1a;
 	input_dev->keycode = rc_keys;
diff --git a/drivers/media/video/usbvideo/konicawc.c b/drivers/media/video/usbvideo/konicawc.c
index 491505d..3e93f80 100644
--- a/drivers/media/video/usbvideo/konicawc.c
+++ b/drivers/media/video/usbvideo/konicawc.c
@@ -238,8 +238,8 @@
 	usb_to_input_id(dev, &input_dev->id);
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY);
-	input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
+	input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
 
 	input_dev->private = cam;
 
diff --git a/drivers/media/video/usbvideo/quickcam_messenger.c b/drivers/media/video/usbvideo/quickcam_messenger.c
index dd1a6d6..d847273 100644
--- a/drivers/media/video/usbvideo/quickcam_messenger.c
+++ b/drivers/media/video/usbvideo/quickcam_messenger.c
@@ -102,8 +102,8 @@
 	usb_to_input_id(dev, &input_dev->id);
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY);
-	input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
+	input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
 
 	input_dev->private = cam;
 
diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c
index 1c14fa2..419e5af 100644
--- a/drivers/media/video/zoran_driver.c
+++ b/drivers/media/video/zoran_driver.c
@@ -1285,7 +1285,7 @@
 	}
 
 	dprintk(1, KERN_INFO "%s: zoran_open(%s, pid=[%d]), users(-)=%d\n",
-		ZR_DEVNAME(zr), current->comm, current->pid, zr->user);
+		ZR_DEVNAME(zr), current->comm, task_pid_nr(current), zr->user);
 
 	/* now, create the open()-specific file_ops struct */
 	fh = kzalloc(sizeof(struct zoran_fh), GFP_KERNEL);
@@ -1358,7 +1358,7 @@
 	struct zoran *zr = fh->zr;
 
 	dprintk(1, KERN_INFO "%s: zoran_close(%s, pid=[%d]), users(+)=%d\n",
-		ZR_DEVNAME(zr), current->comm, current->pid, zr->user);
+		ZR_DEVNAME(zr), current->comm, task_pid_nr(current), zr->user);
 
 	/* kernel locks (fs/device.c), so don't do that ourselves
 	 * (prevents deadlocks) */
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 346c44e..cf02ddc 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -111,6 +111,21 @@
 
 	  If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
+config FUJITSU_LAPTOP
+        tristate "Fujitsu Laptop Extras"
+        depends on X86
+        depends on ACPI
+        depends on BACKLIGHT_CLASS_DEVICE
+        ---help---
+	  This is a driver for laptops built by Fujitsu:
+
+	    * P2xxx/P5xxx/S6xxx/S7xxx series Lifebooks
+	    * Possibly other Fujitsu laptop models
+
+	  It adds support for LCD brightness control.
+
+	  If you have a Fujitsu laptop, say Y or M here.
+
 config MSI_LAPTOP
         tristate "MSI Laptop Extras"
         depends on X86
@@ -134,6 +149,7 @@
 	tristate "Sony Laptop Extras"
 	depends on X86 && ACPI
 	select BACKLIGHT_CLASS_DEVICE
+	depends on INPUT
 	  ---help---
 	  This mini-driver drives the SNC and SPIC devices present in the ACPI
 	  BIOS of the Sony Vaio laptops.
@@ -156,6 +172,7 @@
 	select BACKLIGHT_CLASS_DEVICE
 	select HWMON
 	select NVRAM
+	depends on INPUT
 	---help---
 	  This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
 	  support for Fn-Fx key combinations, Bluetooth control, video
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index a24c614..87f2685 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -15,4 +15,5 @@
 obj-$(CONFIG_SGI_IOC4)		+= ioc4.o
 obj-$(CONFIG_SONY_LAPTOP)	+= sony-laptop.o
 obj-$(CONFIG_THINKPAD_ACPI)	+= thinkpad_acpi.o
+obj-$(CONFIG_FUJITSU_LAPTOP)	+= fujitsu-laptop.o
 obj-$(CONFIG_EEPROM_93CX6)	+= eeprom_93cx6.o
diff --git a/drivers/misc/fujitsu-laptop.c b/drivers/misc/fujitsu-laptop.c
new file mode 100644
index 0000000..d366a6cc
--- /dev/null
+++ b/drivers/misc/fujitsu-laptop.c
@@ -0,0 +1,358 @@
+/*-*-linux-c-*-*/
+
+/*
+  Copyright (C) 2007 Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
+  Based on earlier work:
+    Copyright (C) 2003 Shane Spencer <shane@bogomip.com>
+    Adrian Yee <brewt-fujitsu@brewt.org>
+
+  Templated from msi-laptop.c which is copyright by its respective authors.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+ */
+
+/*
+ * fujitsu-laptop.c - Fujitsu laptop support, providing access to additional
+ * features made available on a range of Fujitsu laptops including the
+ * P2xxx/P5xxx/S6xxx/S7xxx series.
+ *
+ * This driver exports a few files in /sys/devices/platform/fujitsu-laptop/;
+ * others may be added at a later date.
+ *
+ *   lcd_level - Screen brightness: contains a single integer in the
+ *   range 0..7. (rw)
+ *
+ * In addition to these platform device attributes the driver
+ * registers itself in the Linux backlight control subsystem and is
+ * available to userspace under /sys/class/backlight/fujitsu-laptop/.
+ *
+ * This driver has been tested on a Fujitsu Lifebook S7020.  It should
+ * work on most P-series and S-series Lifebooks, but YMMV.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+#include <linux/backlight.h>
+#include <linux/platform_device.h>
+#include <linux/autoconf.h>
+
+#define FUJITSU_DRIVER_VERSION "0.3"
+
+#define FUJITSU_LCD_N_LEVELS 8
+
+#define ACPI_FUJITSU_CLASS              "fujitsu"
+#define ACPI_FUJITSU_HID                "FUJ02B1"
+#define ACPI_FUJITSU_DRIVER_NAME        "Fujitsu laptop FUJ02B1 ACPI extras driver"
+#define ACPI_FUJITSU_DEVICE_NAME        "Fujitsu FUJ02B1"
+
+struct fujitsu_t {
+	acpi_handle acpi_handle;
+	struct backlight_device *bl_device;
+	struct platform_device *pf_device;
+
+	unsigned long fuj02b1_state;
+	unsigned int brightness_changed;
+	unsigned int brightness_level;
+};
+
+static struct fujitsu_t *fujitsu;
+
+/* Hardware access */
+
+static int set_lcd_level(int level)
+{
+	acpi_status status = AE_OK;
+	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+	struct acpi_object_list arg_list = { 1, &arg0 };
+	acpi_handle handle = NULL;
+
+	if (level < 0 || level >= FUJITSU_LCD_N_LEVELS)
+		return -EINVAL;
+
+	if (!fujitsu)
+		return -EINVAL;
+
+	status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
+	if (ACPI_FAILURE(status)) {
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "SBLL not present\n"));
+		return -ENODEV;
+	}
+
+	arg0.integer.value = level;
+
+	status = acpi_evaluate_object(handle, NULL, &arg_list, NULL);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	return 0;
+}
+
+static int get_lcd_level(void)
+{
+	unsigned long state = 0;
+	acpi_status status = AE_OK;
+
+	// Get the Brightness
+	status =
+	    acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state);
+	if (status < 0)
+		return status;
+
+	fujitsu->fuj02b1_state = state;
+	fujitsu->brightness_level = state & 0x0fffffff;
+
+	if (state & 0x80000000)
+		fujitsu->brightness_changed = 1;
+	else
+		fujitsu->brightness_changed = 0;
+
+	if (status < 0)
+		return status;
+
+	return fujitsu->brightness_level;
+}
+
+/* Backlight device stuff */
+
+static int bl_get_brightness(struct backlight_device *b)
+{
+	return get_lcd_level();
+}
+
+static int bl_update_status(struct backlight_device *b)
+{
+	return set_lcd_level(b->props.brightness);
+}
+
+static struct backlight_ops fujitsubl_ops = {
+	.get_brightness = bl_get_brightness,
+	.update_status = bl_update_status,
+};
+
+/* Platform device */
+
+static ssize_t show_lcd_level(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+
+	int ret;
+
+	ret = get_lcd_level();
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%i\n", ret);
+}
+
+static ssize_t store_lcd_level(struct device *dev,
+			       struct device_attribute *attr, const char *buf,
+			       size_t count)
+{
+
+	int level, ret;
+
+	if (sscanf(buf, "%i", &level) != 1
+	    || (level < 0 || level >= FUJITSU_LCD_N_LEVELS))
+		return -EINVAL;
+
+	ret = set_lcd_level(level);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level);
+
+static struct attribute *fujitsupf_attributes[] = {
+	&dev_attr_lcd_level.attr,
+	NULL
+};
+
+static struct attribute_group fujitsupf_attribute_group = {
+	.attrs = fujitsupf_attributes
+};
+
+static struct platform_driver fujitsupf_driver = {
+	.driver = {
+		   .name = "fujitsu-laptop",
+		   .owner = THIS_MODULE,
+		   }
+};
+
+/* ACPI device */
+
+int acpi_fujitsu_add(struct acpi_device *device)
+{
+	int result = 0;
+	int state = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_fujitsu_add");
+
+	if (!device)
+		return -EINVAL;
+
+	fujitsu->acpi_handle = device->handle;
+	sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME);
+	sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
+	acpi_driver_data(device) = fujitsu;
+
+	result = acpi_bus_get_power(fujitsu->acpi_handle, &state);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "Error reading power state\n"));
+		goto end;
+	}
+
+	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+	       acpi_device_name(device), acpi_device_bid(device),
+	       !device->power.state ? "on" : "off");
+
+      end:
+
+	return result;
+}
+
+int acpi_fujitsu_remove(struct acpi_device *device, int type)
+{
+	ACPI_FUNCTION_TRACE("acpi_fujitsu_remove");
+
+	if (!device || !acpi_driver_data(device))
+		return -EINVAL;
+	fujitsu->acpi_handle = 0;
+
+	return 0;
+}
+
+static const struct acpi_device_id fujitsu_device_ids[] = {
+	{ACPI_FUJITSU_HID, 0},
+	{"", 0},
+};
+
+static struct acpi_driver acpi_fujitsu_driver = {
+	.name = ACPI_FUJITSU_DRIVER_NAME,
+	.class = ACPI_FUJITSU_CLASS,
+	.ids = fujitsu_device_ids,
+	.ops = {
+		.add = acpi_fujitsu_add,
+		.remove = acpi_fujitsu_remove,
+		},
+};
+
+/* Initialization */
+
+static int __init fujitsu_init(void)
+{
+	int ret, result;
+
+	if (acpi_disabled)
+		return -ENODEV;
+
+	fujitsu = kmalloc(sizeof(struct fujitsu_t), GFP_KERNEL);
+	if (!fujitsu)
+		return -ENOMEM;
+	memset(fujitsu, 0, sizeof(struct fujitsu_t));
+
+	result = acpi_bus_register_driver(&acpi_fujitsu_driver);
+	if (result < 0) {
+		ret = -ENODEV;
+		goto fail_acpi;
+	}
+
+	/* Register backlight stuff */
+
+	fujitsu->bl_device =
+	    backlight_device_register("fujitsu-laptop", NULL, NULL,
+				      &fujitsubl_ops);
+	if (IS_ERR(fujitsu->bl_device))
+		return PTR_ERR(fujitsu->bl_device);
+
+	fujitsu->bl_device->props.max_brightness = FUJITSU_LCD_N_LEVELS - 1;
+	ret = platform_driver_register(&fujitsupf_driver);
+	if (ret)
+		goto fail_backlight;
+
+	/* Register platform stuff */
+
+	fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1);
+	if (!fujitsu->pf_device) {
+		ret = -ENOMEM;
+		goto fail_platform_driver;
+	}
+
+	ret = platform_device_add(fujitsu->pf_device);
+	if (ret)
+		goto fail_platform_device1;
+
+	ret =
+	    sysfs_create_group(&fujitsu->pf_device->dev.kobj,
+			       &fujitsupf_attribute_group);
+	if (ret)
+		goto fail_platform_device2;
+
+	printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION
+	       " successfully loaded.\n");
+
+	return 0;
+
+      fail_platform_device2:
+
+	platform_device_del(fujitsu->pf_device);
+
+      fail_platform_device1:
+
+	platform_device_put(fujitsu->pf_device);
+
+      fail_platform_driver:
+
+	platform_driver_unregister(&fujitsupf_driver);
+
+      fail_backlight:
+
+	backlight_device_unregister(fujitsu->bl_device);
+
+      fail_acpi:
+
+	kfree(fujitsu);
+
+	return ret;
+}
+
+static void __exit fujitsu_cleanup(void)
+{
+	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
+			   &fujitsupf_attribute_group);
+	platform_device_unregister(fujitsu->pf_device);
+	platform_driver_unregister(&fujitsupf_driver);
+	backlight_device_unregister(fujitsu->bl_device);
+
+	acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+
+	kfree(fujitsu);
+
+	printk(KERN_INFO "fujitsu-laptop: driver unloaded.\n");
+}
+
+module_init(fujitsu_init);
+module_exit(fujitsu_cleanup);
+
+MODULE_AUTHOR("Jonathan Woithe");
+MODULE_DESCRIPTION("Fujitsu laptop extras support");
+MODULE_VERSION(FUJITSU_DRIVER_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c
index 0550ce0..1d9defb 100644
--- a/drivers/misc/ibmasm/remote.c
+++ b/drivers/misc/ibmasm/remote.c
@@ -226,9 +226,9 @@
 	mouse_dev->id.product = pdev->device;
 	mouse_dev->id.version = 1;
 	mouse_dev->dev.parent = sp->dev;
-	mouse_dev->evbit[0]  = BIT(EV_KEY) | BIT(EV_ABS);
-	mouse_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) |
-		BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
+	mouse_dev->evbit[0]  = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
 	set_bit(BTN_TOUCH, mouse_dev->keybit);
 	mouse_dev->name = "ibmasm RSA I remote mouse";
 	input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0);
@@ -239,7 +239,7 @@
 	keybd_dev->id.product = pdev->device;
 	keybd_dev->id.version = 2;
 	keybd_dev->dev.parent = sp->dev;
-	keybd_dev->evbit[0]  = BIT(EV_KEY);
+	keybd_dev->evbit[0]  = BIT_MASK(EV_KEY);
 	keybd_dev->name = "ibmasm RSA I remote keyboard";
 
 	for (i = 0; i < XLATE_SIZE; i++) {
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 5108b7c..cd221fd 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -9,6 +9,7 @@
  *  You need an userspace library to cooperate with this driver. It (and other
  *  info) may be obtained here:
  *  http://www.fi.muni.cz/~xslaby/phantom.html
+ *  or alternatively, you might use OpenHaptics provided by Sensable.
  */
 
 #include <linux/kernel.h>
@@ -24,13 +25,14 @@
 #include <asm/atomic.h>
 #include <asm/io.h>
 
-#define PHANTOM_VERSION		"n0.9.5"
+#define PHANTOM_VERSION		"n0.9.7"
 
 #define PHANTOM_MAX_MINORS	8
 
 #define PHN_IRQCTL		0x4c    /* irq control in caddr space */
 
 #define PHB_RUNNING		1
+#define PHB_NOT_OH		2
 
 static struct class *phantom_class;
 static int phantom_major;
@@ -47,7 +49,11 @@
 	struct cdev cdev;
 
 	struct mutex open_lock;
-	spinlock_t ioctl_lock;
+	spinlock_t regs_lock;
+
+	/* used in NOT_OH mode */
+	struct phm_regs oregs;
+	u32 ctl_reg;
 };
 
 static unsigned char phantom_devices[PHANTOM_MAX_MINORS];
@@ -82,6 +88,7 @@
 	struct phm_regs rs;
 	struct phm_reg r;
 	void __user *argp = (void __user *)arg;
+	unsigned long flags;
 	unsigned int i;
 
 	if (_IOC_TYPE(cmd) != PH_IOC_MAGIC ||
@@ -96,32 +103,45 @@
 		if (r.reg > 7)
 			return -EINVAL;
 
-		spin_lock(&dev->ioctl_lock);
+		spin_lock_irqsave(&dev->regs_lock, flags);
 		if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) &&
 				phantom_status(dev, dev->status | PHB_RUNNING)){
-			spin_unlock(&dev->ioctl_lock);
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
 			return -ENODEV;
 		}
 
 		pr_debug("phantom: writing %x to %u\n", r.value, r.reg);
+
+		/* preserve amp bit (don't allow to change it when in NOT_OH) */
+		if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) {
+			r.value &= ~PHN_CTL_AMP;
+			r.value |= dev->ctl_reg & PHN_CTL_AMP;
+			dev->ctl_reg = r.value;
+		}
+
 		iowrite32(r.value, dev->iaddr + r.reg);
 		ioread32(dev->iaddr); /* PCI posting */
 
 		if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ))
 			phantom_status(dev, dev->status & ~PHB_RUNNING);
-		spin_unlock(&dev->ioctl_lock);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
 		break;
 	case PHN_SET_REGS:
 		if (copy_from_user(&rs, argp, sizeof(rs)))
 			return -EFAULT;
 
 		pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask);
-		spin_lock(&dev->ioctl_lock);
-		for (i = 0; i < min(rs.count, 8U); i++)
-			if ((1 << i) & rs.mask)
-				iowrite32(rs.values[i], dev->oaddr + i);
-		ioread32(dev->iaddr); /* PCI posting */
-		spin_unlock(&dev->ioctl_lock);
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_NOT_OH)
+			memcpy(&dev->oregs, &rs, sizeof(rs));
+		else {
+			u32 m = min(rs.count, 8U);
+			for (i = 0; i < m; i++)
+				if (rs.mask & BIT(i))
+					iowrite32(rs.values[i], dev->oaddr + i);
+			ioread32(dev->iaddr); /* PCI posting */
+		}
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
 		break;
 	case PHN_GET_REG:
 		if (copy_from_user(&r, argp, sizeof(r)))
@@ -135,20 +155,35 @@
 		if (copy_to_user(argp, &r, sizeof(r)))
 			return -EFAULT;
 		break;
-	case PHN_GET_REGS:
+	case PHN_GET_REGS: {
+		u32 m;
+
 		if (copy_from_user(&rs, argp, sizeof(rs)))
 			return -EFAULT;
 
+		m = min(rs.count, 8U);
+
 		pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask);
-		spin_lock(&dev->ioctl_lock);
-		for (i = 0; i < min(rs.count, 8U); i++)
-			if ((1 << i) & rs.mask)
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		for (i = 0; i < m; i++)
+			if (rs.mask & BIT(i))
 				rs.values[i] = ioread32(dev->iaddr + i);
-		spin_unlock(&dev->ioctl_lock);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
 
 		if (copy_to_user(argp, &rs, sizeof(rs)))
 			return -EFAULT;
 		break;
+	} case PHN_NOT_OH:
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_RUNNING) {
+			printk(KERN_ERR "phantom: you need to set NOT_OH "
+					"before you start the device!\n");
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
+			return -EINVAL;
+		}
+		dev->status |= PHB_NOT_OH;
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
 	default:
 		return -ENOTTY;
 	}
@@ -171,8 +206,11 @@
 		return -EINVAL;
 	}
 
+	WARN_ON(dev->status & PHB_NOT_OH);
+
 	file->private_data = dev;
 
+	atomic_set(&dev->counter, 0);
 	dev->opened++;
 	mutex_unlock(&dev->open_lock);
 
@@ -187,6 +225,7 @@
 
 	dev->opened = 0;
 	phantom_status(dev, dev->status & ~PHB_RUNNING);
+	dev->status &= ~PHB_NOT_OH;
 
 	mutex_unlock(&dev->open_lock);
 
@@ -220,12 +259,32 @@
 static irqreturn_t phantom_isr(int irq, void *data)
 {
 	struct phantom_device *dev = data;
+	unsigned int i;
+	u32 ctl;
 
-	if (!(ioread32(dev->iaddr + PHN_CONTROL) & PHN_CTL_IRQ))
+	spin_lock(&dev->regs_lock);
+	ctl = ioread32(dev->iaddr + PHN_CONTROL);
+	if (!(ctl & PHN_CTL_IRQ)) {
+		spin_unlock(&dev->regs_lock);
 		return IRQ_NONE;
+	}
 
 	iowrite32(0, dev->iaddr);
 	iowrite32(0xc0, dev->iaddr);
+
+	if (dev->status & PHB_NOT_OH) {
+		struct phm_regs *r = &dev->oregs;
+		u32 m = min(r->count, 8U);
+
+		for (i = 0; i < m; i++)
+			if (r->mask & BIT(i))
+				iowrite32(r->values[i], dev->oaddr + i);
+
+		dev->ctl_reg ^= PHN_CTL_AMP;
+		iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL);
+	}
+	spin_unlock(&dev->regs_lock);
+
 	ioread32(dev->iaddr); /* PCI posting */
 
 	atomic_inc(&dev->counter);
@@ -297,7 +356,7 @@
 	}
 
 	mutex_init(&pht->open_lock);
-	spin_lock_init(&pht->ioctl_lock);
+	spin_lock_init(&pht->regs_lock);
 	init_waitqueue_head(&pht->wait);
 	cdev_init(&pht->cdev, &phantom_file_ops);
 	pht->cdev.owner = THIS_MODULE;
@@ -378,6 +437,8 @@
 	iowrite32(0, dev->caddr + PHN_IRQCTL);
 	ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
 
+	synchronize_irq(pdev->irq);
+
 	return 0;
 }
 
diff --git a/drivers/misc/sony-laptop.c b/drivers/misc/sony-laptop.c
index e73a71f0..1bfbb87 100644
--- a/drivers/misc/sony-laptop.c
+++ b/drivers/misc/sony-laptop.c
@@ -411,9 +411,9 @@
 	jog_dev->id.bustype = BUS_ISA;
 	jog_dev->id.vendor = PCI_VENDOR_ID_SONY;
 
-	jog_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	jog_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_MIDDLE);
-	jog_dev->relbit[0] = BIT(REL_WHEEL);
+	jog_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	jog_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_MIDDLE);
+	jog_dev->relbit[0] = BIT_MASK(REL_WHEEL);
 
 	error = input_register_device(jog_dev);
 	if (error)
@@ -1173,7 +1173,8 @@
 #define SONYPI_TYPE3_OFFSET	0x12
 
 struct sony_pic_ioport {
-	struct acpi_resource_io	io;
+	struct acpi_resource_io	io1;
+	struct acpi_resource_io	io2;
 	struct list_head	list;
 };
 
@@ -1443,11 +1444,11 @@
 {
 	u8 v1, v2;
 
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2,
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2,
 			ITERATIONS_LONG);
-	outb(dev, spic_dev.cur_ioport->io.minimum + 4);
-	v1 = inb_p(spic_dev.cur_ioport->io.minimum + 4);
-	v2 = inb_p(spic_dev.cur_ioport->io.minimum);
+	outb(dev, spic_dev.cur_ioport->io1.minimum + 4);
+	v1 = inb_p(spic_dev.cur_ioport->io1.minimum + 4);
+	v2 = inb_p(spic_dev.cur_ioport->io1.minimum);
 	dprintk("sony_pic_call1: 0x%.4x\n", (v2 << 8) | v1);
 	return v2;
 }
@@ -1456,13 +1457,13 @@
 {
 	u8 v1;
 
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2,
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2,
 			ITERATIONS_LONG);
-	outb(dev, spic_dev.cur_ioport->io.minimum + 4);
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2,
+	outb(dev, spic_dev.cur_ioport->io1.minimum + 4);
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2,
 			ITERATIONS_LONG);
-	outb(fn, spic_dev.cur_ioport->io.minimum);
-	v1 = inb_p(spic_dev.cur_ioport->io.minimum);
+	outb(fn, spic_dev.cur_ioport->io1.minimum);
+	v1 = inb_p(spic_dev.cur_ioport->io1.minimum);
 	dprintk("sony_pic_call2: 0x%.4x\n", v1);
 	return v1;
 }
@@ -1471,13 +1472,13 @@
 {
 	u8 v1;
 
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2, ITERATIONS_LONG);
-	outb(dev, spic_dev.cur_ioport->io.minimum + 4);
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2, ITERATIONS_LONG);
-	outb(fn, spic_dev.cur_ioport->io.minimum);
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2, ITERATIONS_LONG);
-	outb(v, spic_dev.cur_ioport->io.minimum);
-	v1 = inb_p(spic_dev.cur_ioport->io.minimum);
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2, ITERATIONS_LONG);
+	outb(dev, spic_dev.cur_ioport->io1.minimum + 4);
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2, ITERATIONS_LONG);
+	outb(fn, spic_dev.cur_ioport->io1.minimum);
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2, ITERATIONS_LONG);
+	outb(v, spic_dev.cur_ioport->io1.minimum);
+	v1 = inb_p(spic_dev.cur_ioport->io1.minimum);
 	dprintk("sony_pic_call3: 0x%.4x\n", v1);
 	return v1;
 }
@@ -2074,7 +2075,18 @@
 
 	switch (resource->type) {
 	case ACPI_RESOURCE_TYPE_START_DEPENDENT:
+		{
+			/* start IO enumeration */
+			struct sony_pic_ioport *ioport = kzalloc(sizeof(*ioport), GFP_KERNEL);
+			if (!ioport)
+				return AE_ERROR;
+
+			list_add(&ioport->list, &dev->ioports);
+			return AE_OK;
+		}
+
 	case ACPI_RESOURCE_TYPE_END_DEPENDENT:
+		/* end IO enumeration */
 		return AE_OK;
 
 	case ACPI_RESOURCE_TYPE_IRQ:
@@ -2101,7 +2113,7 @@
 				if (!interrupt)
 					return AE_ERROR;
 
-				list_add_tail(&interrupt->list, &dev->interrupts);
+				list_add(&interrupt->list, &dev->interrupts);
 				interrupt->irq.triggering = p->triggering;
 				interrupt->irq.polarity = p->polarity;
 				interrupt->irq.sharable = p->sharable;
@@ -2113,18 +2125,27 @@
 	case ACPI_RESOURCE_TYPE_IO:
 		{
 			struct acpi_resource_io *io = &resource->data.io;
-			struct sony_pic_ioport *ioport = NULL;
+			struct sony_pic_ioport *ioport =
+				list_first_entry(&dev->ioports, struct sony_pic_ioport, list);
 			if (!io) {
 				dprintk("Blank IO resource\n");
 				return AE_OK;
 			}
 
-			ioport = kzalloc(sizeof(*ioport), GFP_KERNEL);
-			if (!ioport)
+			if (!ioport->io1.minimum) {
+				memcpy(&ioport->io1, io, sizeof(*io));
+				dprintk("IO1 at 0x%.4x (0x%.2x)\n", ioport->io1.minimum,
+						ioport->io1.address_length);
+			}
+			else if (!ioport->io2.minimum) {
+				memcpy(&ioport->io2, io, sizeof(*io));
+				dprintk("IO2 at 0x%.4x (0x%.2x)\n", ioport->io2.minimum,
+						ioport->io2.address_length);
+			}
+			else {
+				printk(KERN_ERR DRV_PFX "Unknown SPIC Type, more than 2 IO Ports\n");
 				return AE_ERROR;
-
-			list_add_tail(&ioport->list, &dev->ioports);
-			memcpy(&ioport->io, io, sizeof(*io));
+			}
 			return AE_OK;
 		}
 	default:
@@ -2199,10 +2220,22 @@
 {
 	acpi_status status;
 	int result = 0;
+	/* Type 1 resource layout is:
+	 *    IO
+	 *    IO
+	 *    IRQNoFlags
+	 *    End
+	 *
+	 * Type 2 and 3 resource layout is:
+	 *    IO
+	 *    IRQNoFlags
+	 *    End
+	 */
 	struct {
-		struct acpi_resource io_res;
-		struct acpi_resource irq_res;
-		struct acpi_resource end;
+		struct acpi_resource res1;
+		struct acpi_resource res2;
+		struct acpi_resource res3;
+		struct acpi_resource res4;
 	} *resource;
 	struct acpi_buffer buffer = { 0, NULL };
 
@@ -2217,21 +2250,49 @@
 	buffer.length = sizeof(*resource) + 1;
 	buffer.pointer = resource;
 
-	/* setup io resource */
-	resource->io_res.type = ACPI_RESOURCE_TYPE_IO;
-	resource->io_res.length = sizeof(struct acpi_resource);
-	memcpy(&resource->io_res.data.io, &ioport->io,
-			sizeof(struct acpi_resource_io));
+	/* setup Type 1 resources */
+	if (spic_dev.model == SONYPI_DEVICE_TYPE1) {
 
-	/* setup irq resource */
-	resource->irq_res.type = ACPI_RESOURCE_TYPE_IRQ;
-	resource->irq_res.length = sizeof(struct acpi_resource);
-	memcpy(&resource->irq_res.data.irq, &irq->irq,
-			sizeof(struct acpi_resource_irq));
-	/* we requested a shared irq */
-	resource->irq_res.data.irq.sharable = ACPI_SHARED;
+		/* setup io resources */
+		resource->res1.type = ACPI_RESOURCE_TYPE_IO;
+		resource->res1.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res1.data.io, &ioport->io1,
+				sizeof(struct acpi_resource_io));
 
-	resource->end.type = ACPI_RESOURCE_TYPE_END_TAG;
+		resource->res2.type = ACPI_RESOURCE_TYPE_IO;
+		resource->res2.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res2.data.io, &ioport->io2,
+				sizeof(struct acpi_resource_io));
+
+		/* setup irq resource */
+		resource->res3.type = ACPI_RESOURCE_TYPE_IRQ;
+		resource->res3.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res3.data.irq, &irq->irq,
+				sizeof(struct acpi_resource_irq));
+		/* we requested a shared irq */
+		resource->res3.data.irq.sharable = ACPI_SHARED;
+
+		resource->res4.type = ACPI_RESOURCE_TYPE_END_TAG;
+
+	}
+	/* setup Type 2/3 resources */
+	else {
+		/* setup io resource */
+		resource->res1.type = ACPI_RESOURCE_TYPE_IO;
+		resource->res1.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res1.data.io, &ioport->io1,
+				sizeof(struct acpi_resource_io));
+
+		/* setup irq resource */
+		resource->res2.type = ACPI_RESOURCE_TYPE_IRQ;
+		resource->res2.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res2.data.irq, &irq->irq,
+				sizeof(struct acpi_resource_irq));
+		/* we requested a shared irq */
+		resource->res2.data.irq.sharable = ACPI_SHARED;
+
+		resource->res3.type = ACPI_RESOURCE_TYPE_END_TAG;
+	}
 
 	/* Attempt to set the resource */
 	dprintk("Evaluating _SRS\n");
@@ -2239,7 +2300,7 @@
 
 	/* check for total failure */
 	if (ACPI_FAILURE(status)) {
-		printk(KERN_ERR DRV_PFX "Error evaluating _SRS");
+		printk(KERN_ERR DRV_PFX "Error evaluating _SRS\n");
 		result = -ENODEV;
 		goto end;
 	}
@@ -2268,11 +2329,14 @@
 
 	struct sony_pic_dev *dev = (struct sony_pic_dev *) dev_id;
 
-	ev = inb_p(dev->cur_ioport->io.minimum);
-	data_mask = inb_p(dev->cur_ioport->io.minimum + dev->evport_offset);
+	ev = inb_p(dev->cur_ioport->io1.minimum);
+	if (dev->cur_ioport->io2.minimum)
+		data_mask = inb_p(dev->cur_ioport->io2.minimum);
+	else
+		data_mask = inb_p(dev->cur_ioport->io1.minimum + dev->evport_offset);
 
 	dprintk("event ([%.2x] [%.2x]) at port 0x%.4x(+0x%.2x)\n",
-			ev, data_mask, dev->cur_ioport->io.minimum, dev->evport_offset);
+			ev, data_mask, dev->cur_ioport->io1.minimum, dev->evport_offset);
 
 	if (ev == 0x00 || ev == 0xff)
 		return IRQ_HANDLED;
@@ -2323,8 +2387,11 @@
 	}
 
 	free_irq(spic_dev.cur_irq->irq.interrupts[0], &spic_dev);
-	release_region(spic_dev.cur_ioport->io.minimum,
-			spic_dev.cur_ioport->io.address_length);
+	release_region(spic_dev.cur_ioport->io1.minimum,
+			spic_dev.cur_ioport->io1.address_length);
+	if (spic_dev.cur_ioport->io2.minimum)
+		release_region(spic_dev.cur_ioport->io2.minimum,
+				spic_dev.cur_ioport->io2.address_length);
 
 	sonypi_compat_exit();
 
@@ -2397,14 +2464,36 @@
 		goto err_remove_input;
 
 	/* request io port */
-	list_for_each_entry(io, &spic_dev.ioports, list) {
-		if (request_region(io->io.minimum, io->io.address_length,
+	list_for_each_entry_reverse(io, &spic_dev.ioports, list) {
+		if (request_region(io->io1.minimum, io->io1.address_length,
 					"Sony Programable I/O Device")) {
-			dprintk("I/O port: 0x%.4x (0x%.4x) + 0x%.2x\n",
-					io->io.minimum, io->io.maximum,
-					io->io.address_length);
-			spic_dev.cur_ioport = io;
-			break;
+			dprintk("I/O port1: 0x%.4x (0x%.4x) + 0x%.2x\n",
+					io->io1.minimum, io->io1.maximum,
+					io->io1.address_length);
+			/* Type 1 have 2 ioports */
+			if (io->io2.minimum) {
+				if (request_region(io->io2.minimum,
+						io->io2.address_length,
+						"Sony Programable I/O Device")) {
+					dprintk("I/O port2: 0x%.4x (0x%.4x) + 0x%.2x\n",
+							io->io2.minimum, io->io2.maximum,
+							io->io2.address_length);
+					spic_dev.cur_ioport = io;
+					break;
+				}
+				else {
+					dprintk("Unable to get I/O port2: "
+							"0x%.4x (0x%.4x) + 0x%.2x\n",
+							io->io2.minimum, io->io2.maximum,
+							io->io2.address_length);
+					release_region(io->io1.minimum,
+							io->io1.address_length);
+				}
+			}
+			else {
+				spic_dev.cur_ioport = io;
+				break;
+			}
 		}
 	}
 	if (!spic_dev.cur_ioport) {
@@ -2414,7 +2503,7 @@
 	}
 
 	/* request IRQ */
-	list_for_each_entry(irq, &spic_dev.interrupts, list) {
+	list_for_each_entry_reverse(irq, &spic_dev.interrupts, list) {
 		if (!request_irq(irq->irq.interrupts[0], sony_pic_irq,
 					IRQF_SHARED, "sony-laptop", &spic_dev)) {
 			dprintk("IRQ: %d - triggering: %d - "
@@ -2462,8 +2551,11 @@
 	free_irq(spic_dev.cur_irq->irq.interrupts[0], &spic_dev);
 
 err_release_region:
-	release_region(spic_dev.cur_ioport->io.minimum,
-			spic_dev.cur_ioport->io.address_length);
+	release_region(spic_dev.cur_ioport->io1.minimum,
+			spic_dev.cur_ioport->io1.address_length);
+	if (spic_dev.cur_ioport->io2.minimum)
+		release_region(spic_dev.cur_ioport->io2.minimum,
+				spic_dev.cur_ioport->io2.address_length);
 
 err_remove_compat:
 	sonypi_compat_exit();
diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 81e068f..e953276 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -22,7 +22,7 @@
  */
 
 #define IBM_VERSION "0.16"
-#define TPACPI_SYSFS_VERSION 0x010000
+#define TPACPI_SYSFS_VERSION 0x020000
 
 /*
  *  Changelog:
@@ -117,6 +117,12 @@
 
 #define __unused __attribute__ ((unused))
 
+static enum {
+	TPACPI_LIFE_INIT = 0,
+	TPACPI_LIFE_RUNNING,
+	TPACPI_LIFE_EXITING,
+} tpacpi_lifecycle;
+
 /****************************************************************************
  ****************************************************************************
  *
@@ -342,6 +348,9 @@
 {
 	struct ibm_struct *ibm = data;
 
+	if (tpacpi_lifecycle != TPACPI_LIFE_RUNNING)
+		return;
+
 	if (!ibm || !ibm->acpi || !ibm->acpi->notify)
 		return;
 
@@ -517,8 +526,10 @@
  ****************************************************************************/
 
 static struct platform_device *tpacpi_pdev;
+static struct platform_device *tpacpi_sensors_pdev;
 static struct device *tpacpi_hwmon;
 static struct input_dev *tpacpi_inputdev;
+static struct mutex tpacpi_inputdev_send_mutex;
 
 
 static int tpacpi_resume_handler(struct platform_device *pdev)
@@ -543,6 +554,12 @@
 	.resume = tpacpi_resume_handler,
 };
 
+static struct platform_driver tpacpi_hwmon_pdriver = {
+	.driver = {
+		.name = IBM_HWMON_DRVR_NAME,
+		.owner = THIS_MODULE,
+	},
+};
 
 /*************************************************************************
  * thinkpad-acpi driver attributes
@@ -692,6 +709,8 @@
 {
 	char *endp;
 
+	while (*buf && isspace(*buf))
+		buf++;
 	*value = simple_strtoul(buf, &endp, 0);
 	while (*endp && isspace(*endp))
 		endp++;
@@ -989,6 +1008,7 @@
 
 	int res, i;
 	int status;
+	int hkeyv;
 
 	vdbg_printk(TPACPI_DBG_INIT, "initializing hotkey subdriver\n");
 
@@ -1014,18 +1034,35 @@
 			return res;
 
 		/* mask not supported on 570, 600e/x, 770e, 770x, A21e, A2xm/p,
-		   A30, R30, R31, T20-22, X20-21, X22-24 */
-		tp_features.hotkey_mask =
-			acpi_evalf(hkey_handle, NULL, "DHKN", "qv");
+		   A30, R30, R31, T20-22, X20-21, X22-24.  Detected by checking
+		   for HKEY interface version 0x100 */
+		if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
+			if ((hkeyv >> 8) != 1) {
+				printk(IBM_ERR "unknown version of the "
+				       "HKEY interface: 0x%x\n", hkeyv);
+				printk(IBM_ERR "please report this to %s\n",
+				       IBM_MAIL);
+			} else {
+				/*
+				 * MHKV 0x100 in A31, R40, R40e,
+				 * T4x, X31, and later
+				 * */
+				tp_features.hotkey_mask = 1;
+			}
+		}
 
 		vdbg_printk(TPACPI_DBG_INIT, "hotkey masks are %s\n",
 			str_supported(tp_features.hotkey_mask));
 
 		if (tp_features.hotkey_mask) {
-			/* MHKA available in A31, R40, R40e, T4x, X31, and later */
 			if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
-					"MHKA", "qd"))
+					"MHKA", "qd")) {
+				printk(IBM_ERR
+				       "missing MHKA handler, "
+				       "please report this to %s\n",
+				       IBM_MAIL);
 				hotkey_all_mask = 0x080cU; /* FN+F12, FN+F4, FN+F3 */
+			}
 		}
 
 		res = hotkey_get(&hotkey_orig_status, &hotkey_orig_mask);
@@ -1131,6 +1168,8 @@
 				  unsigned int keycode)
 {
 	if (keycode != KEY_RESERVED) {
+		mutex_lock(&tpacpi_inputdev_send_mutex);
+
 		input_report_key(tpacpi_inputdev, keycode, 1);
 		if (keycode == KEY_UNKNOWN)
 			input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
@@ -1142,6 +1181,8 @@
 			input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
 				    scancode);
 		input_sync(tpacpi_inputdev);
+
+		mutex_unlock(&tpacpi_inputdev_send_mutex);
 	}
 }
 
@@ -1149,18 +1190,47 @@
 {
 	int wlsw;
 
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw))
+	mutex_lock(&tpacpi_inputdev_send_mutex);
+
+	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) {
 		input_report_switch(tpacpi_inputdev,
 				    SW_RADIO, !!wlsw);
+		input_sync(tpacpi_inputdev);
+	}
+
+	mutex_unlock(&tpacpi_inputdev_send_mutex);
 }
 
 static void hotkey_notify(struct ibm_struct *ibm, u32 event)
 {
 	u32 hkey;
 	unsigned int keycode, scancode;
-	int send_acpi_ev = 0;
+	int send_acpi_ev;
+	int ignore_acpi_ev;
 
-	if (event == 0x80 && acpi_evalf(hkey_handle, &hkey, "MHKP", "d")) {
+	if (event != 0x80) {
+		printk(IBM_ERR "unknown HKEY notification event %d\n", event);
+		/* forward it to userspace, maybe it knows how to handle it */
+		acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class,
+						ibm->acpi->device->dev.bus_id,
+						event, 0);
+		return;
+	}
+
+	while (1) {
+		if (!acpi_evalf(hkey_handle, &hkey, "MHKP", "d")) {
+			printk(IBM_ERR "failed to retrieve HKEY event\n");
+			return;
+		}
+
+		if (hkey == 0) {
+			/* queue empty */
+			return;
+		}
+
+		send_acpi_ev = 0;
+		ignore_acpi_ev = 0;
+
 		switch (hkey >> 12) {
 		case 1:
 			/* 0x1000-0x1FFF: key presses */
@@ -1182,9 +1252,11 @@
 			 * eat up known LID events */
 			if (hkey != 0x5001 && hkey != 0x5002) {
 				printk(IBM_ERR
-					"unknown LID-related hotkey event: 0x%04x\n",
-					hkey);
+				       "unknown LID-related HKEY event: 0x%04x\n",
+				       hkey);
 				send_acpi_ev = 1;
+			} else {
+				ignore_acpi_ev = 1;
 			}
 			break;
 		case 7:
@@ -1202,21 +1274,18 @@
 			printk(IBM_NOTICE "unhandled HKEY event 0x%04x\n", hkey);
 			send_acpi_ev = 1;
 		}
-	} else {
-		printk(IBM_ERR "unknown hotkey notification event %d\n", event);
-		hkey = 0;
-		send_acpi_ev = 1;
-	}
 
-	/* Legacy events */
-	if (send_acpi_ev || hotkey_report_mode < 2)
-		acpi_bus_generate_proc_event(ibm->acpi->device, event, hkey);
+		/* Legacy events */
+		if (!ignore_acpi_ev && (send_acpi_ev || hotkey_report_mode < 2)) {
+			acpi_bus_generate_proc_event(ibm->acpi->device, event, hkey);
+		}
 
-	/* netlink events */
-	if (send_acpi_ev) {
-		acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class,
-						ibm->acpi->device->dev.bus_id,
-						event, hkey);
+		/* netlink events */
+		if (!ignore_acpi_ev && send_acpi_ev) {
+			acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class,
+							ibm->acpi->device->dev.bus_id,
+							event, hkey);
+		}
 	}
 }
 
@@ -2812,7 +2881,7 @@
 
 	switch(thermal_read_mode) {
 	case TPACPI_THERMAL_TPEC_16:
-		res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+		res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
 				&thermal_temp_input16_group);
 		if (res)
 			return res;
@@ -2820,7 +2889,7 @@
 	case TPACPI_THERMAL_TPEC_8:
 	case TPACPI_THERMAL_ACPI_TMP07:
 	case TPACPI_THERMAL_ACPI_UPDT:
-		res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+		res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
 				&thermal_temp_input8_group);
 		if (res)
 			return res;
@@ -2837,13 +2906,13 @@
 {
 	switch(thermal_read_mode) {
 	case TPACPI_THERMAL_TPEC_16:
-		sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+		sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
 				   &thermal_temp_input16_group);
 		break;
 	case TPACPI_THERMAL_TPEC_8:
 	case TPACPI_THERMAL_ACPI_TMP07:
 	case TPACPI_THERMAL_ACPI_UPDT:
-		sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+		sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
 				   &thermal_temp_input16_group);
 		break;
 	case TPACPI_THERMAL_NONE:
@@ -3626,7 +3695,7 @@
 	__ATTR(fan1_input, S_IRUGO,
 		fan_fan1_input_show, NULL);
 
-/* sysfs fan fan_watchdog (driver) ------------------------------------- */
+/* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */
 static ssize_t fan_fan_watchdog_show(struct device_driver *drv,
 				     char *buf)
 {
@@ -3768,10 +3837,10 @@
 
 	if (fan_status_access_mode != TPACPI_FAN_NONE ||
 	    fan_control_access_mode != TPACPI_FAN_WR_NONE) {
-		rc = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+		rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
 					 &fan_attr_group);
 		if (!(rc < 0))
-			rc = driver_create_file(&tpacpi_pdriver.driver,
+			rc = driver_create_file(&tpacpi_hwmon_pdriver.driver,
 					&driver_attr_fan_watchdog);
 		if (rc < 0)
 			return rc;
@@ -3854,8 +3923,8 @@
 	vdbg_printk(TPACPI_DBG_EXIT, "cancelling any pending fan watchdog tasks\n");
 
 	/* FIXME: can we really do this unconditionally? */
-	sysfs_remove_group(&tpacpi_pdev->dev.kobj, &fan_attr_group);
-	driver_remove_file(&tpacpi_pdriver.driver, &driver_attr_fan_watchdog);
+	sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj, &fan_attr_group);
+	driver_remove_file(&tpacpi_hwmon_pdriver.driver, &driver_attr_fan_watchdog);
 
 	cancel_delayed_work(&fan_watchdog_task);
 	flush_scheduled_work();
@@ -3888,6 +3957,9 @@
 {
 	int rc;
 
+	if (tpacpi_lifecycle != TPACPI_LIFE_RUNNING)
+		return;
+
 	printk(IBM_NOTICE "fan watchdog: enabling fan\n");
 	rc = fan_set_enable();
 	if (rc < 0) {
@@ -3908,7 +3980,8 @@
 	if (fan_watchdog_active)
 		cancel_delayed_work(&fan_watchdog_task);
 
-	if (fan_watchdog_maxinterval > 0) {
+	if (fan_watchdog_maxinterval > 0 &&
+	    tpacpi_lifecycle != TPACPI_LIFE_EXITING) {
 		fan_watchdog_active = 1;
 		if (!schedule_delayed_work(&fan_watchdog_task,
 				msecs_to_jiffies(fan_watchdog_maxinterval
@@ -4302,6 +4375,19 @@
  ****************************************************************************
  ****************************************************************************/
 
+/* sysfs name ---------------------------------------------------------- */
+static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", IBM_NAME);
+}
+
+static struct device_attribute dev_attr_thinkpad_acpi_pdev_name =
+	__ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL);
+
+/* --------------------------------------------------------------------- */
+
 /* /proc support */
 static struct proc_dir_entry *proc_dir;
 
@@ -4674,6 +4760,8 @@
 {
 	int ret, i;
 
+	tpacpi_lifecycle = TPACPI_LIFE_INIT;
+
 	/* Parameter checking */
 	if (hotkey_report_mode > 2)
 		return -EINVAL;
@@ -4702,19 +4790,31 @@
 
 	ret = platform_driver_register(&tpacpi_pdriver);
 	if (ret) {
-		printk(IBM_ERR "unable to register platform driver\n");
+		printk(IBM_ERR "unable to register main platform driver\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
 	tp_features.platform_drv_registered = 1;
 
+	ret = platform_driver_register(&tpacpi_hwmon_pdriver);
+	if (ret) {
+		printk(IBM_ERR "unable to register hwmon platform driver\n");
+		thinkpad_acpi_module_exit();
+		return ret;
+	}
+	tp_features.sensors_pdrv_registered = 1;
+
 	ret = tpacpi_create_driver_attributes(&tpacpi_pdriver.driver);
+	if (!ret) {
+		tp_features.platform_drv_attrs_registered = 1;
+		ret = tpacpi_create_driver_attributes(&tpacpi_hwmon_pdriver.driver);
+	}
 	if (ret) {
 		printk(IBM_ERR "unable to create sysfs driver attributes\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
-	tp_features.platform_drv_attrs_registered = 1;
+	tp_features.sensors_pdrv_attrs_registered = 1;
 
 
 	/* Device initialization */
@@ -4727,7 +4827,26 @@
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
-	tpacpi_hwmon = hwmon_device_register(&tpacpi_pdev->dev);
+	tpacpi_sensors_pdev = platform_device_register_simple(
+							IBM_HWMON_DRVR_NAME,
+							-1, NULL, 0);
+	if (IS_ERR(tpacpi_sensors_pdev)) {
+		ret = PTR_ERR(tpacpi_sensors_pdev);
+		tpacpi_sensors_pdev = NULL;
+		printk(IBM_ERR "unable to register hwmon platform device\n");
+		thinkpad_acpi_module_exit();
+		return ret;
+	}
+	ret = device_create_file(&tpacpi_sensors_pdev->dev,
+				 &dev_attr_thinkpad_acpi_pdev_name);
+	if (ret) {
+		printk(IBM_ERR
+			"unable to create sysfs hwmon device attributes\n");
+		thinkpad_acpi_module_exit();
+		return ret;
+	}
+	tp_features.sensors_pdev_attrs_registered = 1;
+	tpacpi_hwmon = hwmon_device_register(&tpacpi_sensors_pdev->dev);
 	if (IS_ERR(tpacpi_hwmon)) {
 		ret = PTR_ERR(tpacpi_hwmon);
 		tpacpi_hwmon = NULL;
@@ -4735,6 +4854,7 @@
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
+	mutex_init(&tpacpi_inputdev_send_mutex);
 	tpacpi_inputdev = input_allocate_device();
 	if (!tpacpi_inputdev) {
 		printk(IBM_ERR "unable to allocate input device\n");
@@ -4769,6 +4889,7 @@
 		tp_features.input_device_registered = 1;
 	}
 
+	tpacpi_lifecycle = TPACPI_LIFE_RUNNING;
 	return 0;
 }
 
@@ -4776,6 +4897,8 @@
 {
 	struct ibm_struct *ibm, *itmp;
 
+	tpacpi_lifecycle = TPACPI_LIFE_EXITING;
+
 	list_for_each_entry_safe_reverse(ibm, itmp,
 					 &tpacpi_all_drivers,
 					 all_drivers) {
@@ -4794,12 +4917,22 @@
 	if (tpacpi_hwmon)
 		hwmon_device_unregister(tpacpi_hwmon);
 
+	if (tp_features.sensors_pdev_attrs_registered)
+		device_remove_file(&tpacpi_sensors_pdev->dev,
+				   &dev_attr_thinkpad_acpi_pdev_name);
+	if (tpacpi_sensors_pdev)
+		platform_device_unregister(tpacpi_sensors_pdev);
 	if (tpacpi_pdev)
 		platform_device_unregister(tpacpi_pdev);
 
+	if (tp_features.sensors_pdrv_attrs_registered)
+		tpacpi_remove_driver_attributes(&tpacpi_hwmon_pdriver.driver);
 	if (tp_features.platform_drv_attrs_registered)
 		tpacpi_remove_driver_attributes(&tpacpi_pdriver.driver);
 
+	if (tp_features.sensors_pdrv_registered)
+		platform_driver_unregister(&tpacpi_hwmon_pdriver);
+
 	if (tp_features.platform_drv_registered)
 		platform_driver_unregister(&tpacpi_pdriver);
 
diff --git a/drivers/misc/thinkpad_acpi.h b/drivers/misc/thinkpad_acpi.h
index acd5835..3abcc81 100644
--- a/drivers/misc/thinkpad_acpi.h
+++ b/drivers/misc/thinkpad_acpi.h
@@ -58,13 +58,14 @@
 
 #define IBM_NAME "thinkpad"
 #define IBM_DESC "ThinkPad ACPI Extras"
-#define IBM_FILE "thinkpad_acpi"
+#define IBM_FILE IBM_NAME "_acpi"
 #define IBM_URL "http://ibm-acpi.sf.net/"
 #define IBM_MAIL "ibm-acpi-devel@lists.sourceforge.net"
 
 #define IBM_PROC_DIR "ibm"
 #define IBM_ACPI_EVENT_PREFIX "ibm"
 #define IBM_DRVR_NAME IBM_FILE
+#define IBM_HWMON_DRVR_NAME IBM_NAME "_hwmon"
 
 #define IBM_LOG IBM_FILE ": "
 #define IBM_ERR	   KERN_ERR    IBM_LOG
@@ -171,6 +172,7 @@
 
 /* Device model */
 static struct platform_device *tpacpi_pdev;
+static struct platform_device *tpacpi_sensors_pdev;
 static struct device *tpacpi_hwmon;
 static struct platform_driver tpacpi_pdriver;
 static struct input_dev *tpacpi_inputdev;
@@ -233,22 +235,25 @@
 
 static struct {
 #ifdef CONFIG_THINKPAD_ACPI_BAY
-	u16 bay_status:1;
-	u16 bay_eject:1;
-	u16 bay_status2:1;
-	u16 bay_eject2:1;
+	u32 bay_status:1;
+	u32 bay_eject:1;
+	u32 bay_status2:1;
+	u32 bay_eject2:1;
 #endif
-	u16 bluetooth:1;
-	u16 hotkey:1;
-	u16 hotkey_mask:1;
-	u16 hotkey_wlsw:1;
-	u16 light:1;
-	u16 light_status:1;
-	u16 wan:1;
-	u16 fan_ctrl_status_undef:1;
-	u16 input_device_registered:1;
-	u16 platform_drv_registered:1;
-	u16 platform_drv_attrs_registered:1;
+	u32 bluetooth:1;
+	u32 hotkey:1;
+	u32 hotkey_mask:1;
+	u32 hotkey_wlsw:1;
+	u32 light:1;
+	u32 light_status:1;
+	u32 wan:1;
+	u32 fan_ctrl_status_undef:1;
+	u32 input_device_registered:1;
+	u32 platform_drv_registered:1;
+	u32 platform_drv_attrs_registered:1;
+	u32 sensors_pdrv_registered:1;
+	u32 sensors_pdrv_attrs_registered:1;
+	u32 sensors_pdev_attrs_registered:1;
 } tp_features;
 
 struct thinkpad_id_data {
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a4f1bf3..6330c8c 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1309,7 +1309,7 @@
 	struct ubi_device *ubi = u;
 
 	ubi_msg("background thread \"%s\" started, PID %d",
-		ubi->bgt_name, current->pid);
+		ubi->bgt_name, task_pid_nr(current));
 
 	set_freezable();
 	for (;;) {
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 96cee4b..da767d3 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -26,7 +26,7 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/dma-mapping.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <linux/delay.h>
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 7a045a3..084f029 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -126,7 +126,7 @@
 
 // ================= main 802.3ad protocol functions ==================
 static int ad_lacpdu_send(struct port *port);
-static int ad_marker_send(struct port *port, struct marker *marker);
+static int ad_marker_send(struct port *port, struct bond_marker *marker);
 static void ad_mux_machine(struct port *port);
 static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
 static void ad_tx_machine(struct port *port);
@@ -139,8 +139,8 @@
 static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
 static void ad_enable_collecting_distributing(struct port *port);
 static void ad_disable_collecting_distributing(struct port *port);
-static void ad_marker_info_received(struct marker *marker_info, struct port *port);
-static void ad_marker_response_received(struct marker *marker, struct port *port);
+static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port);
+static void ad_marker_response_received(struct bond_marker *marker, struct port *port);
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -889,12 +889,12 @@
  * Returns:   0 on success
  *          < 0 on error
  */
-static int ad_marker_send(struct port *port, struct marker *marker)
+static int ad_marker_send(struct port *port, struct bond_marker *marker)
 {
 	struct slave *slave = port->slave;
 	struct sk_buff *skb;
-	struct marker_header *marker_header;
-	int length = sizeof(struct marker_header);
+	struct bond_marker_header *marker_header;
+	int length = sizeof(struct bond_marker_header);
 	struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
 
 	skb = dev_alloc_skb(length + 16);
@@ -909,7 +909,7 @@
 	skb->network_header = skb->mac_header + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
-	marker_header = (struct marker_header *)skb_put(skb, length);
+	marker_header = (struct bond_marker_header *)skb_put(skb, length);
 
 	marker_header->ad_header.destination_address = lacpdu_multicast_address;
 	/* Note: source addres is set to be the member's PERMANENT address, because we use it
@@ -1709,7 +1709,7 @@
  */
 static void ad_marker_info_send(struct port *port)
 {
-	struct marker marker;
+	struct bond_marker marker;
 	u16 index;
 
 	// fill the marker PDU with the appropriate values
@@ -1742,13 +1742,14 @@
  * @port: the port we're looking at
  *
  */
-static void ad_marker_info_received(struct marker *marker_info,struct port *port)
+static void ad_marker_info_received(struct bond_marker *marker_info,
+	struct port *port)
 {
-	struct marker marker;
+	struct bond_marker marker;
 
 	// copy the received marker data to the response marker
 	//marker = *marker_info;
-	memcpy(&marker, marker_info, sizeof(struct marker));
+	memcpy(&marker, marker_info, sizeof(struct bond_marker));
 	// change the marker subtype to marker response
 	marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
 	// send the marker response
@@ -1767,7 +1768,8 @@
  * response for marker PDU's, in this stage, but only to respond to marker
  * information.
  */
-static void ad_marker_response_received(struct marker *marker, struct port *port)
+static void ad_marker_response_received(struct bond_marker *marker,
+	struct port *port)
 {
 	marker=NULL; // just to satisfy the compiler
 	port=NULL;  // just to satisfy the compiler
@@ -2164,15 +2166,15 @@
 		case AD_TYPE_MARKER:
 			// No need to convert fields to Little Endian since we don't use the marker's fields.
 
-			switch (((struct marker *)lacpdu)->tlv_type) {
+			switch (((struct bond_marker *)lacpdu)->tlv_type) {
 			case AD_MARKER_INFORMATION_SUBTYPE:
 				dprintk("Received Marker Information on port %d\n", port->actor_port_number);
-				ad_marker_info_received((struct marker *)lacpdu, port);
+				ad_marker_info_received((struct bond_marker *)lacpdu, port);
 				break;
 
 			case AD_MARKER_RESPONSE_SUBTYPE:
 				dprintk("Received Marker Response on port %d\n", port->actor_port_number);
-				ad_marker_response_received((struct marker *)lacpdu, port);
+				ad_marker_response_received((struct bond_marker *)lacpdu, port);
 				break;
 
 			default:
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index 862952f..f165572 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -92,7 +92,7 @@
 typedef enum {
 	AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
 	AD_MARKER_RESPONSE_SUBTYPE     // marker response subtype
-} marker_subtype_t;
+} bond_marker_subtype_t;
 
 // timers types(43.4.9 in the 802.3ad standard)
 typedef enum {
@@ -148,7 +148,7 @@
 } lacpdu_header_t;
 
 // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
-typedef struct marker {
+typedef struct bond_marker {
 	u8 subtype;		 //  = 0x02  (marker PDU)
 	u8 version_number;	 //  = 0x01
 	u8 tlv_type;		 //  = 0x01  (marker information)
@@ -161,12 +161,12 @@
 	u8 tlv_type_terminator;	     //  = 0x00
 	u8 terminator_length;	     //  = 0x00
 	u8 reserved_90[90];	     //  = 0
-} marker_t;
+} bond_marker_t;
 
-typedef struct marker_header {
+typedef struct bond_marker_header {
 	struct ad_header ad_header;
-	struct marker marker;
-} marker_header_t;
+	struct bond_marker marker;
+} bond_marker_header_t;
 
 #pragma pack()
 
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 314b2f6..edd6828 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -234,6 +234,7 @@
 #include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/bitops.h>
 
 #include <linux/if.h>
 #include <linux/mii.h>
@@ -247,7 +248,6 @@
 #include <asm/irq.h>
 #include <asm/dma.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/ethernet.h>
 #include <asm/cache.h>
 
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 0442617..2a3df14 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -41,9 +41,9 @@
 #include <linux/timer.h>
 #include <linux/cache.h>
 #include <linux/mutex.h>
+#include <linux/bitops.h>
 #include "t3cdev.h"
 #include <asm/semaphore.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 
 typedef irqreturn_t(*intr_handler_t) (int, void *);
diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c
index 243fc6b..e3dd8b1 100644
--- a/drivers/net/eth16i.c
+++ b/drivers/net/eth16i.c
@@ -170,7 +170,6 @@
 
 
 /* Few macros */
-#define BIT(a)		       ( (1 << (a)) )
 #define BITSET(ioaddr, bnum)   ((outb(((inb(ioaddr)) | (bnum)), ioaddr)))
 #define BITCLR(ioaddr, bnum)   ((outb(((inb(ioaddr)) & (~(bnum))), ioaddr)))
 
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index bc02e46..11b83da 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -21,6 +21,7 @@
 
 
 #include <linux/module.h>
+#include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/if_arp.h>
@@ -35,7 +36,6 @@
 #include <linux/sockios.h>
 #include <linux/workqueue.h>
 #include <asm/atomic.h>
-#include <asm/bitops.h>
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/net/mac89x0.c b/drivers/net/mac89x0.c
index 30854f0..a19b595 100644
--- a/drivers/net/mac89x0.c
+++ b/drivers/net/mac89x0.c
@@ -99,9 +99,9 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/hwtest.h>
 #include <asm/macints.h>
diff --git a/drivers/net/meth.h b/drivers/net/meth.h
index ea3b8fc..a78dc1c 100644
--- a/drivers/net/meth.h
+++ b/drivers/net/meth.h
@@ -28,9 +28,6 @@
 #define RX_BUFFER_OFFSET (sizeof(rx_status_vector)+2) /* staus vector + 2 bytes of padding */
 #define RX_BUCKET_SIZE 256
 
-#undef BIT
-#define BIT(x)	(1UL << (x))
-
 /* For more detailed explanations of what each field menas,
    see Nick's great comments to #defines below (or docs, if
    you are lucky enough toget hold of them :)*/
diff --git a/drivers/net/s2io-regs.h b/drivers/net/s2io-regs.h
index aef66e2..01f08d7 100644
--- a/drivers/net/s2io-regs.h
+++ b/drivers/net/s2io-regs.h
@@ -20,17 +20,17 @@
 
 /* General Control-Status Registers */
 	u64 general_int_status;
-#define GEN_INTR_TXPIC             BIT(0)
-#define GEN_INTR_TXDMA             BIT(1)
-#define GEN_INTR_TXMAC             BIT(2)
-#define GEN_INTR_TXXGXS            BIT(3)
-#define GEN_INTR_TXTRAFFIC         BIT(8)
-#define GEN_INTR_RXPIC             BIT(32)
-#define GEN_INTR_RXDMA             BIT(33)
-#define GEN_INTR_RXMAC             BIT(34)
-#define GEN_INTR_MC                BIT(35)
-#define GEN_INTR_RXXGXS            BIT(36)
-#define GEN_INTR_RXTRAFFIC         BIT(40)
+#define GEN_INTR_TXPIC             s2BIT(0)
+#define GEN_INTR_TXDMA             s2BIT(1)
+#define GEN_INTR_TXMAC             s2BIT(2)
+#define GEN_INTR_TXXGXS            s2BIT(3)
+#define GEN_INTR_TXTRAFFIC         s2BIT(8)
+#define GEN_INTR_RXPIC             s2BIT(32)
+#define GEN_INTR_RXDMA             s2BIT(33)
+#define GEN_INTR_RXMAC             s2BIT(34)
+#define GEN_INTR_MC                s2BIT(35)
+#define GEN_INTR_RXXGXS            s2BIT(36)
+#define GEN_INTR_RXTRAFFIC         s2BIT(40)
 #define GEN_ERROR_INTR             GEN_INTR_TXPIC | GEN_INTR_RXPIC | \
                                    GEN_INTR_TXDMA | GEN_INTR_RXDMA | \
                                    GEN_INTR_TXMAC | GEN_INTR_RXMAC | \
@@ -54,36 +54,36 @@
 
 
 	u64 adapter_status;
-#define ADAPTER_STATUS_TDMA_READY          BIT(0)
-#define ADAPTER_STATUS_RDMA_READY          BIT(1)
-#define ADAPTER_STATUS_PFC_READY           BIT(2)
-#define ADAPTER_STATUS_TMAC_BUF_EMPTY      BIT(3)
-#define ADAPTER_STATUS_PIC_QUIESCENT       BIT(5)
-#define ADAPTER_STATUS_RMAC_REMOTE_FAULT   BIT(6)
-#define ADAPTER_STATUS_RMAC_LOCAL_FAULT    BIT(7)
+#define ADAPTER_STATUS_TDMA_READY          s2BIT(0)
+#define ADAPTER_STATUS_RDMA_READY          s2BIT(1)
+#define ADAPTER_STATUS_PFC_READY           s2BIT(2)
+#define ADAPTER_STATUS_TMAC_BUF_EMPTY      s2BIT(3)
+#define ADAPTER_STATUS_PIC_QUIESCENT       s2BIT(5)
+#define ADAPTER_STATUS_RMAC_REMOTE_FAULT   s2BIT(6)
+#define ADAPTER_STATUS_RMAC_LOCAL_FAULT    s2BIT(7)
 #define ADAPTER_STATUS_RMAC_PCC_IDLE       vBIT(0xFF,8,8)
 #define ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE  vBIT(0x0F,8,8)
 #define ADAPTER_STATUS_RC_PRC_QUIESCENT    vBIT(0xFF,16,8)
-#define ADAPTER_STATUS_MC_DRAM_READY       BIT(24)
-#define ADAPTER_STATUS_MC_QUEUES_READY     BIT(25)
-#define ADAPTER_STATUS_M_PLL_LOCK          BIT(30)
-#define ADAPTER_STATUS_P_PLL_LOCK          BIT(31)
+#define ADAPTER_STATUS_MC_DRAM_READY       s2BIT(24)
+#define ADAPTER_STATUS_MC_QUEUES_READY     s2BIT(25)
+#define ADAPTER_STATUS_M_PLL_LOCK          s2BIT(30)
+#define ADAPTER_STATUS_P_PLL_LOCK          s2BIT(31)
 
 	u64 adapter_control;
-#define ADAPTER_CNTL_EN                    BIT(7)
-#define ADAPTER_EOI_TX_ON                  BIT(15)
-#define ADAPTER_LED_ON                     BIT(23)
+#define ADAPTER_CNTL_EN                    s2BIT(7)
+#define ADAPTER_EOI_TX_ON                  s2BIT(15)
+#define ADAPTER_LED_ON                     s2BIT(23)
 #define ADAPTER_UDPI(val)                  vBIT(val,36,4)
-#define ADAPTER_WAIT_INT                   BIT(48)
-#define ADAPTER_ECC_EN                     BIT(55)
+#define ADAPTER_WAIT_INT                   s2BIT(48)
+#define ADAPTER_ECC_EN                     s2BIT(55)
 
 	u64 serr_source;
-#define SERR_SOURCE_PIC			BIT(0)
-#define SERR_SOURCE_TXDMA		BIT(1)
-#define SERR_SOURCE_RXDMA		BIT(2)
-#define SERR_SOURCE_MAC                 BIT(3)
-#define SERR_SOURCE_MC                  BIT(4)
-#define SERR_SOURCE_XGXS                BIT(5)
+#define SERR_SOURCE_PIC			s2BIT(0)
+#define SERR_SOURCE_TXDMA		s2BIT(1)
+#define SERR_SOURCE_RXDMA		s2BIT(2)
+#define SERR_SOURCE_MAC                 s2BIT(3)
+#define SERR_SOURCE_MC                  s2BIT(4)
+#define SERR_SOURCE_XGXS                s2BIT(5)
 #define	SERR_SOURCE_ANY			(SERR_SOURCE_PIC	| \
 					SERR_SOURCE_TXDMA	| \
 					SERR_SOURCE_RXDMA	| \
@@ -101,41 +101,41 @@
 #define	PCI_MODE_PCIX_M2_66		0x5
 #define	PCI_MODE_PCIX_M2_100		0x6
 #define	PCI_MODE_PCIX_M2_133		0x7
-#define	PCI_MODE_UNSUPPORTED		BIT(0)
-#define	PCI_MODE_32_BITS		BIT(8)
-#define	PCI_MODE_UNKNOWN_MODE		BIT(9)
+#define	PCI_MODE_UNSUPPORTED		s2BIT(0)
+#define	PCI_MODE_32_BITS		s2BIT(8)
+#define	PCI_MODE_UNKNOWN_MODE		s2BIT(9)
 
 	u8 unused_0[0x800 - 0x128];
 
 /* PCI-X Controller registers */
 	u64 pic_int_status;
 	u64 pic_int_mask;
-#define PIC_INT_TX                     BIT(0)
-#define PIC_INT_FLSH                   BIT(1)
-#define PIC_INT_MDIO                   BIT(2)
-#define PIC_INT_IIC                    BIT(3)
-#define PIC_INT_GPIO                   BIT(4)
-#define PIC_INT_RX                     BIT(32)
+#define PIC_INT_TX                     s2BIT(0)
+#define PIC_INT_FLSH                   s2BIT(1)
+#define PIC_INT_MDIO                   s2BIT(2)
+#define PIC_INT_IIC                    s2BIT(3)
+#define PIC_INT_GPIO                   s2BIT(4)
+#define PIC_INT_RX                     s2BIT(32)
 
 	u64 txpic_int_reg;
 	u64 txpic_int_mask;
-#define PCIX_INT_REG_ECC_SG_ERR                BIT(0)
-#define PCIX_INT_REG_ECC_DB_ERR                BIT(1)
-#define PCIX_INT_REG_FLASHR_R_FSM_ERR          BIT(8)
-#define PCIX_INT_REG_FLASHR_W_FSM_ERR          BIT(9)
-#define PCIX_INT_REG_INI_TX_FSM_SERR           BIT(10)
-#define PCIX_INT_REG_INI_TXO_FSM_ERR           BIT(11)
-#define PCIX_INT_REG_TRT_FSM_SERR              BIT(13)
-#define PCIX_INT_REG_SRT_FSM_SERR              BIT(14)
-#define PCIX_INT_REG_PIFR_FSM_SERR             BIT(15)
-#define PCIX_INT_REG_WRC_TX_SEND_FSM_SERR      BIT(21)
-#define PCIX_INT_REG_RRC_TX_REQ_FSM_SERR       BIT(23)
-#define PCIX_INT_REG_INI_RX_FSM_SERR           BIT(48)
-#define PCIX_INT_REG_RA_RX_FSM_SERR            BIT(50)
+#define PCIX_INT_REG_ECC_SG_ERR                s2BIT(0)
+#define PCIX_INT_REG_ECC_DB_ERR                s2BIT(1)
+#define PCIX_INT_REG_FLASHR_R_FSM_ERR          s2BIT(8)
+#define PCIX_INT_REG_FLASHR_W_FSM_ERR          s2BIT(9)
+#define PCIX_INT_REG_INI_TX_FSM_SERR           s2BIT(10)
+#define PCIX_INT_REG_INI_TXO_FSM_ERR           s2BIT(11)
+#define PCIX_INT_REG_TRT_FSM_SERR              s2BIT(13)
+#define PCIX_INT_REG_SRT_FSM_SERR              s2BIT(14)
+#define PCIX_INT_REG_PIFR_FSM_SERR             s2BIT(15)
+#define PCIX_INT_REG_WRC_TX_SEND_FSM_SERR      s2BIT(21)
+#define PCIX_INT_REG_RRC_TX_REQ_FSM_SERR       s2BIT(23)
+#define PCIX_INT_REG_INI_RX_FSM_SERR           s2BIT(48)
+#define PCIX_INT_REG_RA_RX_FSM_SERR            s2BIT(50)
 /*
-#define PCIX_INT_REG_WRC_RX_SEND_FSM_SERR      BIT(52)
-#define PCIX_INT_REG_RRC_RX_REQ_FSM_SERR       BIT(54)
-#define PCIX_INT_REG_RRC_RX_SPLIT_FSM_SERR     BIT(58)
+#define PCIX_INT_REG_WRC_RX_SEND_FSM_SERR      s2BIT(52)
+#define PCIX_INT_REG_RRC_RX_REQ_FSM_SERR       s2BIT(54)
+#define PCIX_INT_REG_RRC_RX_SPLIT_FSM_SERR     s2BIT(58)
 */
 	u64 txpic_alarms;
 	u64 rxpic_int_reg;
@@ -144,92 +144,92 @@
 
 	u64 flsh_int_reg;
 	u64 flsh_int_mask;
-#define PIC_FLSH_INT_REG_CYCLE_FSM_ERR         BIT(63)
-#define PIC_FLSH_INT_REG_ERR                   BIT(62)
+#define PIC_FLSH_INT_REG_CYCLE_FSM_ERR         s2BIT(63)
+#define PIC_FLSH_INT_REG_ERR                   s2BIT(62)
 	u64 flash_alarms;
 
 	u64 mdio_int_reg;
 	u64 mdio_int_mask;
-#define MDIO_INT_REG_MDIO_BUS_ERR              BIT(0)
-#define MDIO_INT_REG_DTX_BUS_ERR               BIT(8)
-#define MDIO_INT_REG_LASI                      BIT(39)
+#define MDIO_INT_REG_MDIO_BUS_ERR              s2BIT(0)
+#define MDIO_INT_REG_DTX_BUS_ERR               s2BIT(8)
+#define MDIO_INT_REG_LASI                      s2BIT(39)
 	u64 mdio_alarms;
 
 	u64 iic_int_reg;
 	u64 iic_int_mask;
-#define IIC_INT_REG_BUS_FSM_ERR                BIT(4)
-#define IIC_INT_REG_BIT_FSM_ERR                BIT(5)
-#define IIC_INT_REG_CYCLE_FSM_ERR              BIT(6)
-#define IIC_INT_REG_REQ_FSM_ERR                BIT(7)
-#define IIC_INT_REG_ACK_ERR                    BIT(8)
+#define IIC_INT_REG_BUS_FSM_ERR                s2BIT(4)
+#define IIC_INT_REG_BIT_FSM_ERR                s2BIT(5)
+#define IIC_INT_REG_CYCLE_FSM_ERR              s2BIT(6)
+#define IIC_INT_REG_REQ_FSM_ERR                s2BIT(7)
+#define IIC_INT_REG_ACK_ERR                    s2BIT(8)
 	u64 iic_alarms;
 
 	u8 unused4[0x08];
 
 	u64 gpio_int_reg;
-#define GPIO_INT_REG_DP_ERR_INT                BIT(0)
-#define GPIO_INT_REG_LINK_DOWN                 BIT(1)
-#define GPIO_INT_REG_LINK_UP                   BIT(2)
+#define GPIO_INT_REG_DP_ERR_INT                s2BIT(0)
+#define GPIO_INT_REG_LINK_DOWN                 s2BIT(1)
+#define GPIO_INT_REG_LINK_UP                   s2BIT(2)
 	u64 gpio_int_mask;
-#define GPIO_INT_MASK_LINK_DOWN                BIT(1)
-#define GPIO_INT_MASK_LINK_UP                  BIT(2)
+#define GPIO_INT_MASK_LINK_DOWN                s2BIT(1)
+#define GPIO_INT_MASK_LINK_UP                  s2BIT(2)
 	u64 gpio_alarms;
 
 	u8 unused5[0x38];
 
 	u64 tx_traffic_int;
-#define TX_TRAFFIC_INT_n(n)                    BIT(n)
+#define TX_TRAFFIC_INT_n(n)                    s2BIT(n)
 	u64 tx_traffic_mask;
 
 	u64 rx_traffic_int;
-#define RX_TRAFFIC_INT_n(n)                    BIT(n)
+#define RX_TRAFFIC_INT_n(n)                    s2BIT(n)
 	u64 rx_traffic_mask;
 
 /* PIC Control registers */
 	u64 pic_control;
-#define PIC_CNTL_RX_ALARM_MAP_1                BIT(0)
+#define PIC_CNTL_RX_ALARM_MAP_1                s2BIT(0)
 #define PIC_CNTL_SHARED_SPLITS(n)              vBIT(n,11,5)
 
 	u64 swapper_ctrl;
-#define SWAPPER_CTRL_PIF_R_FE                  BIT(0)
-#define SWAPPER_CTRL_PIF_R_SE                  BIT(1)
-#define SWAPPER_CTRL_PIF_W_FE                  BIT(8)
-#define SWAPPER_CTRL_PIF_W_SE                  BIT(9)
-#define SWAPPER_CTRL_TXP_FE                    BIT(16)
-#define SWAPPER_CTRL_TXP_SE                    BIT(17)
-#define SWAPPER_CTRL_TXD_R_FE                  BIT(18)
-#define SWAPPER_CTRL_TXD_R_SE                  BIT(19)
-#define SWAPPER_CTRL_TXD_W_FE                  BIT(20)
-#define SWAPPER_CTRL_TXD_W_SE                  BIT(21)
-#define SWAPPER_CTRL_TXF_R_FE                  BIT(22)
-#define SWAPPER_CTRL_TXF_R_SE                  BIT(23)
-#define SWAPPER_CTRL_RXD_R_FE                  BIT(32)
-#define SWAPPER_CTRL_RXD_R_SE                  BIT(33)
-#define SWAPPER_CTRL_RXD_W_FE                  BIT(34)
-#define SWAPPER_CTRL_RXD_W_SE                  BIT(35)
-#define SWAPPER_CTRL_RXF_W_FE                  BIT(36)
-#define SWAPPER_CTRL_RXF_W_SE                  BIT(37)
-#define SWAPPER_CTRL_XMSI_FE                   BIT(40)
-#define SWAPPER_CTRL_XMSI_SE                   BIT(41)
-#define SWAPPER_CTRL_STATS_FE                  BIT(48)
-#define SWAPPER_CTRL_STATS_SE                  BIT(49)
+#define SWAPPER_CTRL_PIF_R_FE                  s2BIT(0)
+#define SWAPPER_CTRL_PIF_R_SE                  s2BIT(1)
+#define SWAPPER_CTRL_PIF_W_FE                  s2BIT(8)
+#define SWAPPER_CTRL_PIF_W_SE                  s2BIT(9)
+#define SWAPPER_CTRL_TXP_FE                    s2BIT(16)
+#define SWAPPER_CTRL_TXP_SE                    s2BIT(17)
+#define SWAPPER_CTRL_TXD_R_FE                  s2BIT(18)
+#define SWAPPER_CTRL_TXD_R_SE                  s2BIT(19)
+#define SWAPPER_CTRL_TXD_W_FE                  s2BIT(20)
+#define SWAPPER_CTRL_TXD_W_SE                  s2BIT(21)
+#define SWAPPER_CTRL_TXF_R_FE                  s2BIT(22)
+#define SWAPPER_CTRL_TXF_R_SE                  s2BIT(23)
+#define SWAPPER_CTRL_RXD_R_FE                  s2BIT(32)
+#define SWAPPER_CTRL_RXD_R_SE                  s2BIT(33)
+#define SWAPPER_CTRL_RXD_W_FE                  s2BIT(34)
+#define SWAPPER_CTRL_RXD_W_SE                  s2BIT(35)
+#define SWAPPER_CTRL_RXF_W_FE                  s2BIT(36)
+#define SWAPPER_CTRL_RXF_W_SE                  s2BIT(37)
+#define SWAPPER_CTRL_XMSI_FE                   s2BIT(40)
+#define SWAPPER_CTRL_XMSI_SE                   s2BIT(41)
+#define SWAPPER_CTRL_STATS_FE                  s2BIT(48)
+#define SWAPPER_CTRL_STATS_SE                  s2BIT(49)
 
 	u64 pif_rd_swapper_fb;
 #define IF_RD_SWAPPER_FB                            0x0123456789ABCDEF
 
 	u64 scheduled_int_ctrl;
-#define SCHED_INT_CTRL_TIMER_EN                BIT(0)
-#define SCHED_INT_CTRL_ONE_SHOT                BIT(1)
+#define SCHED_INT_CTRL_TIMER_EN                s2BIT(0)
+#define SCHED_INT_CTRL_ONE_SHOT                s2BIT(1)
 #define SCHED_INT_CTRL_INT2MSI(val)		vBIT(val,10,6)
 #define SCHED_INT_PERIOD                       TBD
 
 	u64 txreqtimeout;
 #define TXREQTO_VAL(val)						vBIT(val,0,32)
-#define TXREQTO_EN								BIT(63)
+#define TXREQTO_EN								s2BIT(63)
 
 	u64 statsreqtimeout;
 #define STATREQTO_VAL(n)                       TBD
-#define STATREQTO_EN                           BIT(63)
+#define STATREQTO_EN                           s2BIT(63)
 
 	u64 read_retry_delay;
 	u64 read_retry_acceleration;
@@ -255,10 +255,10 @@
 
 	/* Automated statistics collection */
 	u64 stat_cfg;
-#define STAT_CFG_STAT_EN           BIT(0)
-#define STAT_CFG_ONE_SHOT_EN       BIT(1)
-#define STAT_CFG_STAT_NS_EN        BIT(8)
-#define STAT_CFG_STAT_RO           BIT(9)
+#define STAT_CFG_STAT_EN           s2BIT(0)
+#define STAT_CFG_ONE_SHOT_EN       s2BIT(1)
+#define STAT_CFG_STAT_NS_EN        s2BIT(8)
+#define STAT_CFG_STAT_RO           s2BIT(9)
 #define STAT_TRSF_PER(n)           TBD
 #define	PER_SEC					   0x208d5
 #define	SET_UPDT_PERIOD(n)		   vBIT((PER_SEC*n),32,32)
@@ -290,18 +290,18 @@
 #define	I2C_CONTROL_DEV_ID(id)		vBIT(id,1,3)
 #define	I2C_CONTROL_ADDR(addr)		vBIT(addr,5,11)
 #define	I2C_CONTROL_BYTE_CNT(cnt)	vBIT(cnt,22,2)
-#define	I2C_CONTROL_READ			BIT(24)
-#define	I2C_CONTROL_NACK			BIT(25)
+#define	I2C_CONTROL_READ			s2BIT(24)
+#define	I2C_CONTROL_NACK			s2BIT(25)
 #define	I2C_CONTROL_CNTL_START		vBIT(0xE,28,4)
 #define	I2C_CONTROL_CNTL_END(val)	(val & vBIT(0x1,28,4))
 #define	I2C_CONTROL_GET_DATA(val)	(u32)(val & 0xFFFFFFFF)
 #define	I2C_CONTROL_SET_DATA(val)	vBIT(val,32,32)
 
 	u64 gpio_control;
-#define GPIO_CTRL_GPIO_0		BIT(8)
+#define GPIO_CTRL_GPIO_0		s2BIT(8)
 	u64 misc_control;
-#define FAULT_BEHAVIOUR			BIT(0)
-#define EXT_REQ_EN			BIT(1)
+#define FAULT_BEHAVIOUR			s2BIT(0)
+#define EXT_REQ_EN			s2BIT(1)
 #define MISC_LINK_STABILITY_PRD(val)   vBIT(val,29,3)
 
 	u8 unused7_1[0x230 - 0x208];
@@ -317,29 +317,29 @@
 /* TxDMA registers */
 	u64 txdma_int_status;
 	u64 txdma_int_mask;
-#define TXDMA_PFC_INT                  BIT(0)
-#define TXDMA_TDA_INT                  BIT(1)
-#define TXDMA_PCC_INT                  BIT(2)
-#define TXDMA_TTI_INT                  BIT(3)
-#define TXDMA_LSO_INT                  BIT(4)
-#define TXDMA_TPA_INT                  BIT(5)
-#define TXDMA_SM_INT                   BIT(6)
+#define TXDMA_PFC_INT                  s2BIT(0)
+#define TXDMA_TDA_INT                  s2BIT(1)
+#define TXDMA_PCC_INT                  s2BIT(2)
+#define TXDMA_TTI_INT                  s2BIT(3)
+#define TXDMA_LSO_INT                  s2BIT(4)
+#define TXDMA_TPA_INT                  s2BIT(5)
+#define TXDMA_SM_INT                   s2BIT(6)
 	u64 pfc_err_reg;
-#define PFC_ECC_SG_ERR			BIT(7)
-#define PFC_ECC_DB_ERR			BIT(15)
-#define PFC_SM_ERR_ALARM		BIT(23)
-#define PFC_MISC_0_ERR			BIT(31)
-#define PFC_MISC_1_ERR			BIT(32)
-#define PFC_PCIX_ERR			BIT(39)
+#define PFC_ECC_SG_ERR			s2BIT(7)
+#define PFC_ECC_DB_ERR			s2BIT(15)
+#define PFC_SM_ERR_ALARM		s2BIT(23)
+#define PFC_MISC_0_ERR			s2BIT(31)
+#define PFC_MISC_1_ERR			s2BIT(32)
+#define PFC_PCIX_ERR			s2BIT(39)
 	u64 pfc_err_mask;
 	u64 pfc_err_alarm;
 
 	u64 tda_err_reg;
 #define TDA_Fn_ECC_SG_ERR		vBIT(0xff,0,8)
 #define TDA_Fn_ECC_DB_ERR		vBIT(0xff,8,8)
-#define TDA_SM0_ERR_ALARM		BIT(22)
-#define TDA_SM1_ERR_ALARM		BIT(23)
-#define TDA_PCIX_ERR			BIT(39)
+#define TDA_SM0_ERR_ALARM		s2BIT(22)
+#define TDA_SM1_ERR_ALARM		s2BIT(23)
+#define TDA_PCIX_ERR			s2BIT(39)
 	u64 tda_err_mask;
 	u64 tda_err_alarm;
 
@@ -351,40 +351,40 @@
 #define PCC_SM_ERR_ALARM		vBIT(0xff,32,8)
 #define PCC_WR_ERR_ALARM		vBIT(0xff,40,8)
 #define PCC_N_SERR			vBIT(0xff,48,8)
-#define PCC_6_COF_OV_ERR		BIT(56)
-#define PCC_7_COF_OV_ERR		BIT(57)
-#define PCC_6_LSO_OV_ERR		BIT(58)
-#define PCC_7_LSO_OV_ERR		BIT(59)
+#define PCC_6_COF_OV_ERR		s2BIT(56)
+#define PCC_7_COF_OV_ERR		s2BIT(57)
+#define PCC_6_LSO_OV_ERR		s2BIT(58)
+#define PCC_7_LSO_OV_ERR		s2BIT(59)
 #define PCC_ENABLE_FOUR			vBIT(0x0F,0,8)
 	u64 pcc_err_mask;
 	u64 pcc_err_alarm;
 
 	u64 tti_err_reg;
-#define TTI_ECC_SG_ERR			BIT(7)
-#define TTI_ECC_DB_ERR			BIT(15)
-#define TTI_SM_ERR_ALARM		BIT(23)
+#define TTI_ECC_SG_ERR			s2BIT(7)
+#define TTI_ECC_DB_ERR			s2BIT(15)
+#define TTI_SM_ERR_ALARM		s2BIT(23)
 	u64 tti_err_mask;
 	u64 tti_err_alarm;
 
 	u64 lso_err_reg;
-#define LSO6_SEND_OFLOW			BIT(12)
-#define LSO7_SEND_OFLOW			BIT(13)
-#define LSO6_ABORT			BIT(14)
-#define LSO7_ABORT			BIT(15)
-#define LSO6_SM_ERR_ALARM		BIT(22)
-#define LSO7_SM_ERR_ALARM		BIT(23)
+#define LSO6_SEND_OFLOW			s2BIT(12)
+#define LSO7_SEND_OFLOW			s2BIT(13)
+#define LSO6_ABORT			s2BIT(14)
+#define LSO7_ABORT			s2BIT(15)
+#define LSO6_SM_ERR_ALARM		s2BIT(22)
+#define LSO7_SM_ERR_ALARM		s2BIT(23)
 	u64 lso_err_mask;
 	u64 lso_err_alarm;
 
 	u64 tpa_err_reg;
-#define TPA_TX_FRM_DROP			BIT(7)
-#define TPA_SM_ERR_ALARM		BIT(23)
+#define TPA_TX_FRM_DROP			s2BIT(7)
+#define TPA_SM_ERR_ALARM		s2BIT(23)
 
 	u64 tpa_err_mask;
 	u64 tpa_err_alarm;
 
 	u64 sm_err_reg;
-#define SM_SM_ERR_ALARM			BIT(15)
+#define SM_SM_ERR_ALARM			s2BIT(15)
 	u64 sm_err_mask;
 	u64 sm_err_alarm;
 
@@ -397,7 +397,7 @@
 #define X_MAX_FIFOS                        8
 #define X_FIFO_MAX_LEN                     0x1FFF	/*8191 */
 	u64 tx_fifo_partition_0;
-#define TX_FIFO_PARTITION_EN               BIT(0)
+#define TX_FIFO_PARTITION_EN               s2BIT(0)
 #define TX_FIFO_PARTITION_0_PRI(val)       vBIT(val,5,3)
 #define TX_FIFO_PARTITION_0_LEN(val)       vBIT(val,19,13)
 #define TX_FIFO_PARTITION_1_PRI(val)       vBIT(val,37,3)
@@ -437,16 +437,16 @@
 	u64 tx_w_round_robin_4;
 
 	u64 tti_command_mem;
-#define TTI_CMD_MEM_WE                     BIT(7)
-#define TTI_CMD_MEM_STROBE_NEW_CMD         BIT(15)
-#define TTI_CMD_MEM_STROBE_BEING_EXECUTED  BIT(15)
+#define TTI_CMD_MEM_WE                     s2BIT(7)
+#define TTI_CMD_MEM_STROBE_NEW_CMD         s2BIT(15)
+#define TTI_CMD_MEM_STROBE_BEING_EXECUTED  s2BIT(15)
 #define TTI_CMD_MEM_OFFSET(n)              vBIT(n,26,6)
 
 	u64 tti_data1_mem;
 #define TTI_DATA1_MEM_TX_TIMER_VAL(n)      vBIT(n,6,26)
 #define TTI_DATA1_MEM_TX_TIMER_AC_CI(n)    vBIT(n,38,2)
-#define TTI_DATA1_MEM_TX_TIMER_AC_EN       BIT(38)
-#define TTI_DATA1_MEM_TX_TIMER_CI_EN       BIT(39)
+#define TTI_DATA1_MEM_TX_TIMER_AC_EN       s2BIT(38)
+#define TTI_DATA1_MEM_TX_TIMER_CI_EN       s2BIT(39)
 #define TTI_DATA1_MEM_TX_URNG_A(n)         vBIT(n,41,7)
 #define TTI_DATA1_MEM_TX_URNG_B(n)         vBIT(n,49,7)
 #define TTI_DATA1_MEM_TX_URNG_C(n)         vBIT(n,57,7)
@@ -459,11 +459,11 @@
 
 /* Tx Protocol assist */
 	u64 tx_pa_cfg;
-#define TX_PA_CFG_IGNORE_FRM_ERR           BIT(1)
-#define TX_PA_CFG_IGNORE_SNAP_OUI          BIT(2)
-#define TX_PA_CFG_IGNORE_LLC_CTRL          BIT(3)
-#define	TX_PA_CFG_IGNORE_L2_ERR			   BIT(6)
-#define RX_PA_CFG_STRIP_VLAN_TAG		BIT(15)
+#define TX_PA_CFG_IGNORE_FRM_ERR           s2BIT(1)
+#define TX_PA_CFG_IGNORE_SNAP_OUI          s2BIT(2)
+#define TX_PA_CFG_IGNORE_LLC_CTRL          s2BIT(3)
+#define	TX_PA_CFG_IGNORE_L2_ERR			   s2BIT(6)
+#define RX_PA_CFG_STRIP_VLAN_TAG		s2BIT(15)
 
 /* Recent add, used only debug purposes. */
 	u64 pcc_enable;
@@ -477,31 +477,31 @@
 /* RxDMA Registers */
 	u64 rxdma_int_status;
 	u64 rxdma_int_mask;
-#define RXDMA_INT_RC_INT_M             BIT(0)
-#define RXDMA_INT_RPA_INT_M            BIT(1)
-#define RXDMA_INT_RDA_INT_M            BIT(2)
-#define RXDMA_INT_RTI_INT_M            BIT(3)
+#define RXDMA_INT_RC_INT_M             s2BIT(0)
+#define RXDMA_INT_RPA_INT_M            s2BIT(1)
+#define RXDMA_INT_RDA_INT_M            s2BIT(2)
+#define RXDMA_INT_RTI_INT_M            s2BIT(3)
 
 	u64 rda_err_reg;
 #define RDA_RXDn_ECC_SG_ERR		vBIT(0xFF,0,8)
 #define RDA_RXDn_ECC_DB_ERR		vBIT(0xFF,8,8)
-#define RDA_FRM_ECC_SG_ERR		BIT(23)
-#define RDA_FRM_ECC_DB_N_AERR		BIT(31)
-#define RDA_SM1_ERR_ALARM		BIT(38)
-#define RDA_SM0_ERR_ALARM		BIT(39)
-#define RDA_MISC_ERR			BIT(47)
-#define RDA_PCIX_ERR			BIT(55)
-#define RDA_RXD_ECC_DB_SERR		BIT(63)
+#define RDA_FRM_ECC_SG_ERR		s2BIT(23)
+#define RDA_FRM_ECC_DB_N_AERR		s2BIT(31)
+#define RDA_SM1_ERR_ALARM		s2BIT(38)
+#define RDA_SM0_ERR_ALARM		s2BIT(39)
+#define RDA_MISC_ERR			s2BIT(47)
+#define RDA_PCIX_ERR			s2BIT(55)
+#define RDA_RXD_ECC_DB_SERR		s2BIT(63)
 	u64 rda_err_mask;
 	u64 rda_err_alarm;
 
 	u64 rc_err_reg;
 #define RC_PRCn_ECC_SG_ERR		vBIT(0xFF,0,8)
 #define RC_PRCn_ECC_DB_ERR		vBIT(0xFF,8,8)
-#define RC_FTC_ECC_SG_ERR		BIT(23)
-#define RC_FTC_ECC_DB_ERR		BIT(31)
+#define RC_FTC_ECC_SG_ERR		s2BIT(23)
+#define RC_FTC_ECC_DB_ERR		s2BIT(31)
 #define RC_PRCn_SM_ERR_ALARM		vBIT(0xFF,32,8)
-#define RC_FTC_SM_ERR_ALARM		BIT(47)
+#define RC_FTC_SM_ERR_ALARM		s2BIT(47)
 #define RC_RDA_FAIL_WR_Rn		vBIT(0xFF,48,8)
 	u64 rc_err_mask;
 	u64 rc_err_alarm;
@@ -517,18 +517,18 @@
 	u64 prc_pcix_err_alarm;
 
 	u64 rpa_err_reg;
-#define RPA_ECC_SG_ERR			BIT(7)
-#define RPA_ECC_DB_ERR			BIT(15)
-#define RPA_FLUSH_REQUEST		BIT(22)
-#define RPA_SM_ERR_ALARM		BIT(23)
-#define RPA_CREDIT_ERR			BIT(31)
+#define RPA_ECC_SG_ERR			s2BIT(7)
+#define RPA_ECC_DB_ERR			s2BIT(15)
+#define RPA_FLUSH_REQUEST		s2BIT(22)
+#define RPA_SM_ERR_ALARM		s2BIT(23)
+#define RPA_CREDIT_ERR			s2BIT(31)
 	u64 rpa_err_mask;
 	u64 rpa_err_alarm;
 
 	u64 rti_err_reg;
-#define RTI_ECC_SG_ERR			BIT(7)
-#define RTI_ECC_DB_ERR			BIT(15)
-#define RTI_SM_ERR_ALARM		BIT(23)
+#define RTI_ECC_SG_ERR			s2BIT(7)
+#define RTI_ECC_DB_ERR			s2BIT(15)
+#define RTI_SM_ERR_ALARM		s2BIT(23)
 	u64 rti_err_mask;
 	u64 rti_err_alarm;
 
@@ -568,49 +568,49 @@
 #endif
 	u64 prc_rxd0_n[RX_MAX_RINGS];
 	u64 prc_ctrl_n[RX_MAX_RINGS];
-#define PRC_CTRL_RC_ENABLED                    BIT(7)
-#define PRC_CTRL_RING_MODE                     (BIT(14)|BIT(15))
+#define PRC_CTRL_RC_ENABLED                    s2BIT(7)
+#define PRC_CTRL_RING_MODE                     (s2BIT(14)|s2BIT(15))
 #define PRC_CTRL_RING_MODE_1                   vBIT(0,14,2)
 #define PRC_CTRL_RING_MODE_3                   vBIT(1,14,2)
 #define PRC_CTRL_RING_MODE_5                   vBIT(2,14,2)
 #define PRC_CTRL_RING_MODE_x                   vBIT(3,14,2)
-#define PRC_CTRL_NO_SNOOP                      (BIT(22)|BIT(23))
-#define PRC_CTRL_NO_SNOOP_DESC                 BIT(22)
-#define PRC_CTRL_NO_SNOOP_BUFF                 BIT(23)
-#define PRC_CTRL_BIMODAL_INTERRUPT             BIT(37)
-#define PRC_CTRL_GROUP_READS                   BIT(38)
+#define PRC_CTRL_NO_SNOOP                      (s2BIT(22)|s2BIT(23))
+#define PRC_CTRL_NO_SNOOP_DESC                 s2BIT(22)
+#define PRC_CTRL_NO_SNOOP_BUFF                 s2BIT(23)
+#define PRC_CTRL_BIMODAL_INTERRUPT             s2BIT(37)
+#define PRC_CTRL_GROUP_READS                   s2BIT(38)
 #define PRC_CTRL_RXD_BACKOFF_INTERVAL(val)     vBIT(val,40,24)
 
 	u64 prc_alarm_action;
-#define PRC_ALARM_ACTION_RR_R0_STOP            BIT(3)
-#define PRC_ALARM_ACTION_RW_R0_STOP            BIT(7)
-#define PRC_ALARM_ACTION_RR_R1_STOP            BIT(11)
-#define PRC_ALARM_ACTION_RW_R1_STOP            BIT(15)
-#define PRC_ALARM_ACTION_RR_R2_STOP            BIT(19)
-#define PRC_ALARM_ACTION_RW_R2_STOP            BIT(23)
-#define PRC_ALARM_ACTION_RR_R3_STOP            BIT(27)
-#define PRC_ALARM_ACTION_RW_R3_STOP            BIT(31)
-#define PRC_ALARM_ACTION_RR_R4_STOP            BIT(35)
-#define PRC_ALARM_ACTION_RW_R4_STOP            BIT(39)
-#define PRC_ALARM_ACTION_RR_R5_STOP            BIT(43)
-#define PRC_ALARM_ACTION_RW_R5_STOP            BIT(47)
-#define PRC_ALARM_ACTION_RR_R6_STOP            BIT(51)
-#define PRC_ALARM_ACTION_RW_R6_STOP            BIT(55)
-#define PRC_ALARM_ACTION_RR_R7_STOP            BIT(59)
-#define PRC_ALARM_ACTION_RW_R7_STOP            BIT(63)
+#define PRC_ALARM_ACTION_RR_R0_STOP            s2BIT(3)
+#define PRC_ALARM_ACTION_RW_R0_STOP            s2BIT(7)
+#define PRC_ALARM_ACTION_RR_R1_STOP            s2BIT(11)
+#define PRC_ALARM_ACTION_RW_R1_STOP            s2BIT(15)
+#define PRC_ALARM_ACTION_RR_R2_STOP            s2BIT(19)
+#define PRC_ALARM_ACTION_RW_R2_STOP            s2BIT(23)
+#define PRC_ALARM_ACTION_RR_R3_STOP            s2BIT(27)
+#define PRC_ALARM_ACTION_RW_R3_STOP            s2BIT(31)
+#define PRC_ALARM_ACTION_RR_R4_STOP            s2BIT(35)
+#define PRC_ALARM_ACTION_RW_R4_STOP            s2BIT(39)
+#define PRC_ALARM_ACTION_RR_R5_STOP            s2BIT(43)
+#define PRC_ALARM_ACTION_RW_R5_STOP            s2BIT(47)
+#define PRC_ALARM_ACTION_RR_R6_STOP            s2BIT(51)
+#define PRC_ALARM_ACTION_RW_R6_STOP            s2BIT(55)
+#define PRC_ALARM_ACTION_RR_R7_STOP            s2BIT(59)
+#define PRC_ALARM_ACTION_RW_R7_STOP            s2BIT(63)
 
 /* Receive traffic interrupts */
 	u64 rti_command_mem;
-#define RTI_CMD_MEM_WE                          BIT(7)
-#define RTI_CMD_MEM_STROBE                      BIT(15)
-#define RTI_CMD_MEM_STROBE_NEW_CMD              BIT(15)
-#define RTI_CMD_MEM_STROBE_CMD_BEING_EXECUTED   BIT(15)
+#define RTI_CMD_MEM_WE                          s2BIT(7)
+#define RTI_CMD_MEM_STROBE                      s2BIT(15)
+#define RTI_CMD_MEM_STROBE_NEW_CMD              s2BIT(15)
+#define RTI_CMD_MEM_STROBE_CMD_BEING_EXECUTED   s2BIT(15)
 #define RTI_CMD_MEM_OFFSET(n)                   vBIT(n,29,3)
 
 	u64 rti_data1_mem;
 #define RTI_DATA1_MEM_RX_TIMER_VAL(n)      vBIT(n,3,29)
-#define RTI_DATA1_MEM_RX_TIMER_AC_EN       BIT(38)
-#define RTI_DATA1_MEM_RX_TIMER_CI_EN       BIT(39)
+#define RTI_DATA1_MEM_RX_TIMER_AC_EN       s2BIT(38)
+#define RTI_DATA1_MEM_RX_TIMER_CI_EN       s2BIT(39)
 #define RTI_DATA1_MEM_RX_URNG_A(n)         vBIT(n,41,7)
 #define RTI_DATA1_MEM_RX_URNG_B(n)         vBIT(n,49,7)
 #define RTI_DATA1_MEM_RX_URNG_C(n)         vBIT(n,57,7)
@@ -622,10 +622,10 @@
 #define RTI_DATA2_MEM_RX_UFC_D(n)          vBIT(n,48,16)
 
 	u64 rx_pa_cfg;
-#define RX_PA_CFG_IGNORE_FRM_ERR           BIT(1)
-#define RX_PA_CFG_IGNORE_SNAP_OUI          BIT(2)
-#define RX_PA_CFG_IGNORE_LLC_CTRL          BIT(3)
-#define RX_PA_CFG_IGNORE_L2_ERR            BIT(6)
+#define RX_PA_CFG_IGNORE_FRM_ERR           s2BIT(1)
+#define RX_PA_CFG_IGNORE_SNAP_OUI          s2BIT(2)
+#define RX_PA_CFG_IGNORE_LLC_CTRL          s2BIT(3)
+#define RX_PA_CFG_IGNORE_L2_ERR            s2BIT(6)
 
 	u64 unused_11_1;
 
@@ -641,64 +641,64 @@
 /* Media Access Controller Register */
 	u64 mac_int_status;
 	u64 mac_int_mask;
-#define MAC_INT_STATUS_TMAC_INT            BIT(0)
-#define MAC_INT_STATUS_RMAC_INT            BIT(1)
+#define MAC_INT_STATUS_TMAC_INT            s2BIT(0)
+#define MAC_INT_STATUS_RMAC_INT            s2BIT(1)
 
 	u64 mac_tmac_err_reg;
-#define TMAC_ECC_SG_ERR				BIT(7)
-#define TMAC_ECC_DB_ERR				BIT(15)
-#define TMAC_TX_BUF_OVRN			BIT(23)
-#define TMAC_TX_CRI_ERR				BIT(31)
-#define TMAC_TX_SM_ERR				BIT(39)
-#define TMAC_DESC_ECC_SG_ERR			BIT(47)
-#define TMAC_DESC_ECC_DB_ERR			BIT(55)
+#define TMAC_ECC_SG_ERR				s2BIT(7)
+#define TMAC_ECC_DB_ERR				s2BIT(15)
+#define TMAC_TX_BUF_OVRN			s2BIT(23)
+#define TMAC_TX_CRI_ERR				s2BIT(31)
+#define TMAC_TX_SM_ERR				s2BIT(39)
+#define TMAC_DESC_ECC_SG_ERR			s2BIT(47)
+#define TMAC_DESC_ECC_DB_ERR			s2BIT(55)
 
 	u64 mac_tmac_err_mask;
 	u64 mac_tmac_err_alarm;
 
 	u64 mac_rmac_err_reg;
-#define RMAC_RX_BUFF_OVRN			BIT(0)
-#define RMAC_FRM_RCVD_INT			BIT(1)
-#define RMAC_UNUSED_INT				BIT(2)
-#define RMAC_RTS_PNUM_ECC_SG_ERR		BIT(5)
-#define RMAC_RTS_DS_ECC_SG_ERR			BIT(6)
-#define RMAC_RD_BUF_ECC_SG_ERR			BIT(7)
-#define RMAC_RTH_MAP_ECC_SG_ERR			BIT(8)
-#define RMAC_RTH_SPDM_ECC_SG_ERR		BIT(9)
-#define RMAC_RTS_VID_ECC_SG_ERR			BIT(10)
-#define RMAC_DA_SHADOW_ECC_SG_ERR		BIT(11)
-#define RMAC_RTS_PNUM_ECC_DB_ERR		BIT(13)
-#define RMAC_RTS_DS_ECC_DB_ERR			BIT(14)
-#define RMAC_RD_BUF_ECC_DB_ERR			BIT(15)
-#define RMAC_RTH_MAP_ECC_DB_ERR			BIT(16)
-#define RMAC_RTH_SPDM_ECC_DB_ERR		BIT(17)
-#define RMAC_RTS_VID_ECC_DB_ERR			BIT(18)
-#define RMAC_DA_SHADOW_ECC_DB_ERR		BIT(19)
-#define RMAC_LINK_STATE_CHANGE_INT		BIT(31)
-#define RMAC_RX_SM_ERR				BIT(39)
-#define RMAC_SINGLE_ECC_ERR			(BIT(5) | BIT(6) | BIT(7) |\
-						BIT(8)  | BIT(9) | BIT(10)|\
-						BIT(11))
-#define RMAC_DOUBLE_ECC_ERR			(BIT(13) | BIT(14) | BIT(15) |\
-						BIT(16)  | BIT(17) | BIT(18)|\
-						BIT(19))
+#define RMAC_RX_BUFF_OVRN			s2BIT(0)
+#define RMAC_FRM_RCVD_INT			s2BIT(1)
+#define RMAC_UNUSED_INT				s2BIT(2)
+#define RMAC_RTS_PNUM_ECC_SG_ERR		s2BIT(5)
+#define RMAC_RTS_DS_ECC_SG_ERR			s2BIT(6)
+#define RMAC_RD_BUF_ECC_SG_ERR			s2BIT(7)
+#define RMAC_RTH_MAP_ECC_SG_ERR			s2BIT(8)
+#define RMAC_RTH_SPDM_ECC_SG_ERR		s2BIT(9)
+#define RMAC_RTS_VID_ECC_SG_ERR			s2BIT(10)
+#define RMAC_DA_SHADOW_ECC_SG_ERR		s2BIT(11)
+#define RMAC_RTS_PNUM_ECC_DB_ERR		s2BIT(13)
+#define RMAC_RTS_DS_ECC_DB_ERR			s2BIT(14)
+#define RMAC_RD_BUF_ECC_DB_ERR			s2BIT(15)
+#define RMAC_RTH_MAP_ECC_DB_ERR			s2BIT(16)
+#define RMAC_RTH_SPDM_ECC_DB_ERR		s2BIT(17)
+#define RMAC_RTS_VID_ECC_DB_ERR			s2BIT(18)
+#define RMAC_DA_SHADOW_ECC_DB_ERR		s2BIT(19)
+#define RMAC_LINK_STATE_CHANGE_INT		s2BIT(31)
+#define RMAC_RX_SM_ERR				s2BIT(39)
+#define RMAC_SINGLE_ECC_ERR			(s2BIT(5) | s2BIT(6) | s2BIT(7) |\
+						s2BIT(8)  | s2BIT(9) | s2BIT(10)|\
+						s2BIT(11))
+#define RMAC_DOUBLE_ECC_ERR			(s2BIT(13) | s2BIT(14) | s2BIT(15) |\
+						s2BIT(16)  | s2BIT(17) | s2BIT(18)|\
+						s2BIT(19))
 	u64 mac_rmac_err_mask;
 	u64 mac_rmac_err_alarm;
 
 	u8 unused14[0x100 - 0x40];
 
 	u64 mac_cfg;
-#define MAC_CFG_TMAC_ENABLE             BIT(0)
-#define MAC_CFG_RMAC_ENABLE             BIT(1)
-#define MAC_CFG_LAN_NOT_WAN             BIT(2)
-#define MAC_CFG_TMAC_LOOPBACK           BIT(3)
-#define MAC_CFG_TMAC_APPEND_PAD         BIT(4)
-#define MAC_CFG_RMAC_STRIP_FCS          BIT(5)
-#define MAC_CFG_RMAC_STRIP_PAD          BIT(6)
-#define MAC_CFG_RMAC_PROM_ENABLE        BIT(7)
-#define MAC_RMAC_DISCARD_PFRM           BIT(8)
-#define MAC_RMAC_BCAST_ENABLE           BIT(9)
-#define MAC_RMAC_ALL_ADDR_ENABLE        BIT(10)
+#define MAC_CFG_TMAC_ENABLE             s2BIT(0)
+#define MAC_CFG_RMAC_ENABLE             s2BIT(1)
+#define MAC_CFG_LAN_NOT_WAN             s2BIT(2)
+#define MAC_CFG_TMAC_LOOPBACK           s2BIT(3)
+#define MAC_CFG_TMAC_APPEND_PAD         s2BIT(4)
+#define MAC_CFG_RMAC_STRIP_FCS          s2BIT(5)
+#define MAC_CFG_RMAC_STRIP_PAD          s2BIT(6)
+#define MAC_CFG_RMAC_PROM_ENABLE        s2BIT(7)
+#define MAC_RMAC_DISCARD_PFRM           s2BIT(8)
+#define MAC_RMAC_BCAST_ENABLE           s2BIT(9)
+#define MAC_RMAC_ALL_ADDR_ENABLE        s2BIT(10)
 #define MAC_RMAC_INVLD_IPG_THR(val)     vBIT(val,16,8)
 
 	u64 tmac_avg_ipg;
@@ -710,14 +710,14 @@
 #define RMAC_MAX_PYLD_LEN_JUMBO_DEF vBIT(9600,2,14)
 
 	u64 rmac_err_cfg;
-#define RMAC_ERR_FCS                    BIT(0)
-#define RMAC_ERR_FCS_ACCEPT             BIT(1)
-#define RMAC_ERR_TOO_LONG               BIT(1)
-#define RMAC_ERR_TOO_LONG_ACCEPT        BIT(1)
-#define RMAC_ERR_RUNT                   BIT(2)
-#define RMAC_ERR_RUNT_ACCEPT            BIT(2)
-#define RMAC_ERR_LEN_MISMATCH           BIT(3)
-#define RMAC_ERR_LEN_MISMATCH_ACCEPT    BIT(3)
+#define RMAC_ERR_FCS                    s2BIT(0)
+#define RMAC_ERR_FCS_ACCEPT             s2BIT(1)
+#define RMAC_ERR_TOO_LONG               s2BIT(1)
+#define RMAC_ERR_TOO_LONG_ACCEPT        s2BIT(1)
+#define RMAC_ERR_RUNT                   s2BIT(2)
+#define RMAC_ERR_RUNT_ACCEPT            s2BIT(2)
+#define RMAC_ERR_LEN_MISMATCH           s2BIT(3)
+#define RMAC_ERR_LEN_MISMATCH_ACCEPT    s2BIT(3)
 
 	u64 rmac_cfg_key;
 #define RMAC_CFG_KEY(val)               vBIT(val,0,16)
@@ -728,15 +728,15 @@
 #define MAC_MC_ADDR_START_OFFSET    16
 #define MAC_MC_ALL_MC_ADDR_OFFSET   63	/* enables all multicast pkts */
 	u64 rmac_addr_cmd_mem;
-#define RMAC_ADDR_CMD_MEM_WE                    BIT(7)
+#define RMAC_ADDR_CMD_MEM_WE                    s2BIT(7)
 #define RMAC_ADDR_CMD_MEM_RD                    0
-#define RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD        BIT(15)
-#define RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING  BIT(15)
+#define RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD        s2BIT(15)
+#define RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING  s2BIT(15)
 #define RMAC_ADDR_CMD_MEM_OFFSET(n)             vBIT(n,26,6)
 
 	u64 rmac_addr_data0_mem;
 #define RMAC_ADDR_DATA0_MEM_ADDR(n)    vBIT(n,0,48)
-#define RMAC_ADDR_DATA0_MEM_USER       BIT(48)
+#define RMAC_ADDR_DATA0_MEM_USER       s2BIT(48)
 
 	u64 rmac_addr_data1_mem;
 #define RMAC_ADDR_DATA1_MEM_MASK(n)    vBIT(n,0,48)
@@ -753,10 +753,10 @@
 	u64 tmac_ipg_cfg;
 
 	u64 rmac_pause_cfg;
-#define RMAC_PAUSE_GEN             BIT(0)
-#define RMAC_PAUSE_GEN_ENABLE      BIT(0)
-#define RMAC_PAUSE_RX              BIT(1)
-#define RMAC_PAUSE_RX_ENABLE       BIT(1)
+#define RMAC_PAUSE_GEN             s2BIT(0)
+#define RMAC_PAUSE_GEN_ENABLE      s2BIT(0)
+#define RMAC_PAUSE_RX              s2BIT(1)
+#define RMAC_PAUSE_RX_ENABLE       s2BIT(1)
 #define RMAC_PAUSE_HG_PTIME_DEF    vBIT(0xFFFF,16,16)
 #define RMAC_PAUSE_HG_PTIME(val)    vBIT(val,16,16)
 
@@ -787,29 +787,29 @@
 #define MAX_DIX_MAP                         4
 	u64 rts_dix_map_n[MAX_DIX_MAP];
 #define RTS_DIX_MAP_ETYPE(val)             vBIT(val,0,16)
-#define RTS_DIX_MAP_SCW(val)               BIT(val,21)
+#define RTS_DIX_MAP_SCW(val)               s2BIT(val,21)
 
 	u64 rts_q_alternates;
 	u64 rts_default_q;
 
 	u64 rts_ctrl;
-#define RTS_CTRL_IGNORE_SNAP_OUI           BIT(2)
-#define RTS_CTRL_IGNORE_LLC_CTRL           BIT(3)
+#define RTS_CTRL_IGNORE_SNAP_OUI           s2BIT(2)
+#define RTS_CTRL_IGNORE_LLC_CTRL           s2BIT(3)
 
 	u64 rts_pn_cam_ctrl;
-#define RTS_PN_CAM_CTRL_WE                 BIT(7)
-#define RTS_PN_CAM_CTRL_STROBE_NEW_CMD     BIT(15)
-#define RTS_PN_CAM_CTRL_STROBE_BEING_EXECUTED   BIT(15)
+#define RTS_PN_CAM_CTRL_WE                 s2BIT(7)
+#define RTS_PN_CAM_CTRL_STROBE_NEW_CMD     s2BIT(15)
+#define RTS_PN_CAM_CTRL_STROBE_BEING_EXECUTED   s2BIT(15)
 #define RTS_PN_CAM_CTRL_OFFSET(n)          vBIT(n,24,8)
 	u64 rts_pn_cam_data;
-#define RTS_PN_CAM_DATA_TCP_SELECT         BIT(7)
+#define RTS_PN_CAM_DATA_TCP_SELECT         s2BIT(7)
 #define RTS_PN_CAM_DATA_PORT(val)          vBIT(val,8,16)
 #define RTS_PN_CAM_DATA_SCW(val)           vBIT(val,24,8)
 
 	u64 rts_ds_mem_ctrl;
-#define RTS_DS_MEM_CTRL_WE                 BIT(7)
-#define RTS_DS_MEM_CTRL_STROBE_NEW_CMD     BIT(15)
-#define RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED   BIT(15)
+#define RTS_DS_MEM_CTRL_WE                 s2BIT(7)
+#define RTS_DS_MEM_CTRL_STROBE_NEW_CMD     s2BIT(15)
+#define RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED   s2BIT(15)
 #define RTS_DS_MEM_CTRL_OFFSET(n)          vBIT(n,26,6)
 	u64 rts_ds_mem_data;
 #define RTS_DS_MEM_DATA(n)                 vBIT(n,0,8)
@@ -823,23 +823,23 @@
 
 /* memory controller registers */
 	u64 mc_int_status;
-#define MC_INT_STATUS_MC_INT               BIT(0)
+#define MC_INT_STATUS_MC_INT               s2BIT(0)
 	u64 mc_int_mask;
-#define MC_INT_MASK_MC_INT                 BIT(0)
+#define MC_INT_MASK_MC_INT                 s2BIT(0)
 
 	u64 mc_err_reg;
-#define MC_ERR_REG_ECC_DB_ERR_L            BIT(14)
-#define MC_ERR_REG_ECC_DB_ERR_U            BIT(15)
-#define MC_ERR_REG_MIRI_ECC_DB_ERR_0       BIT(18)
-#define MC_ERR_REG_MIRI_ECC_DB_ERR_1       BIT(20)
-#define MC_ERR_REG_MIRI_CRI_ERR_0          BIT(22)
-#define MC_ERR_REG_MIRI_CRI_ERR_1          BIT(23)
-#define MC_ERR_REG_SM_ERR                  BIT(31)
-#define MC_ERR_REG_ECC_ALL_SNG		   (BIT(2) | BIT(3) | BIT(4) | BIT(5) |\
-					BIT(17) | BIT(19))
-#define MC_ERR_REG_ECC_ALL_DBL		   (BIT(10) | BIT(11) | BIT(12) |\
-					BIT(13) | BIT(18) | BIT(20))
-#define PLL_LOCK_N			BIT(39)
+#define MC_ERR_REG_ECC_DB_ERR_L            s2BIT(14)
+#define MC_ERR_REG_ECC_DB_ERR_U            s2BIT(15)
+#define MC_ERR_REG_MIRI_ECC_DB_ERR_0       s2BIT(18)
+#define MC_ERR_REG_MIRI_ECC_DB_ERR_1       s2BIT(20)
+#define MC_ERR_REG_MIRI_CRI_ERR_0          s2BIT(22)
+#define MC_ERR_REG_MIRI_CRI_ERR_1          s2BIT(23)
+#define MC_ERR_REG_SM_ERR                  s2BIT(31)
+#define MC_ERR_REG_ECC_ALL_SNG		   (s2BIT(2) | s2BIT(3) | s2BIT(4) | s2BIT(5) |\
+					s2BIT(17) | s2BIT(19))
+#define MC_ERR_REG_ECC_ALL_DBL		   (s2BIT(10) | s2BIT(11) | s2BIT(12) |\
+					s2BIT(13) | s2BIT(18) | s2BIT(20))
+#define PLL_LOCK_N			s2BIT(39)
 	u64 mc_err_mask;
 	u64 mc_err_alarm;
 
@@ -857,8 +857,8 @@
 #define RX_QUEUE_CFG_Q7_SZ(n)              vBIT(n,56,8)
 
 	u64 mc_rldram_mrs;
-#define	MC_RLDRAM_QUEUE_SIZE_ENABLE			BIT(39)
-#define	MC_RLDRAM_MRS_ENABLE				BIT(47)
+#define	MC_RLDRAM_QUEUE_SIZE_ENABLE			s2BIT(39)
+#define	MC_RLDRAM_MRS_ENABLE				s2BIT(47)
 
 	u64 mc_rldram_interleave;
 
@@ -871,11 +871,11 @@
 	u64 mc_rldram_ref_per;
 	u8 unused20[0x220 - 0x208];
 	u64 mc_rldram_test_ctrl;
-#define MC_RLDRAM_TEST_MODE		BIT(47)
-#define MC_RLDRAM_TEST_WRITE	BIT(7)
-#define MC_RLDRAM_TEST_GO		BIT(15)
-#define MC_RLDRAM_TEST_DONE		BIT(23)
-#define MC_RLDRAM_TEST_PASS		BIT(31)
+#define MC_RLDRAM_TEST_MODE		s2BIT(47)
+#define MC_RLDRAM_TEST_WRITE	s2BIT(7)
+#define MC_RLDRAM_TEST_GO		s2BIT(15)
+#define MC_RLDRAM_TEST_DONE		s2BIT(23)
+#define MC_RLDRAM_TEST_PASS		s2BIT(31)
 
 	u8 unused21[0x240 - 0x228];
 	u64 mc_rldram_test_add;
@@ -888,7 +888,7 @@
 
 	u8 unused24_1[0x360 - 0x308];
 	u64 mc_rldram_ctrl;
-#define	MC_RLDRAM_ENABLE_ODT		BIT(7)
+#define	MC_RLDRAM_ENABLE_ODT		s2BIT(7)
 
 	u8 unused24_2[0x640 - 0x368];
 	u64 mc_rldram_ref_per_herc;
@@ -906,24 +906,24 @@
 	/* XGXS control registers */
 
 	u64 xgxs_int_status;
-#define XGXS_INT_STATUS_TXGXS              BIT(0)
-#define XGXS_INT_STATUS_RXGXS              BIT(1)
+#define XGXS_INT_STATUS_TXGXS              s2BIT(0)
+#define XGXS_INT_STATUS_RXGXS              s2BIT(1)
 	u64 xgxs_int_mask;
-#define XGXS_INT_MASK_TXGXS                BIT(0)
-#define XGXS_INT_MASK_RXGXS                BIT(1)
+#define XGXS_INT_MASK_TXGXS                s2BIT(0)
+#define XGXS_INT_MASK_RXGXS                s2BIT(1)
 
 	u64 xgxs_txgxs_err_reg;
-#define TXGXS_ECC_SG_ERR		BIT(7)
-#define TXGXS_ECC_DB_ERR		BIT(15)
-#define TXGXS_ESTORE_UFLOW		BIT(31)
-#define TXGXS_TX_SM_ERR			BIT(39)
+#define TXGXS_ECC_SG_ERR		s2BIT(7)
+#define TXGXS_ECC_DB_ERR		s2BIT(15)
+#define TXGXS_ESTORE_UFLOW		s2BIT(31)
+#define TXGXS_TX_SM_ERR			s2BIT(39)
 
 	u64 xgxs_txgxs_err_mask;
 	u64 xgxs_txgxs_err_alarm;
 
 	u64 xgxs_rxgxs_err_reg;
-#define RXGXS_ESTORE_OFLOW		BIT(7)
-#define RXGXS_RX_SM_ERR			BIT(39)
+#define RXGXS_ESTORE_OFLOW		s2BIT(7)
+#define RXGXS_RX_SM_ERR			s2BIT(39)
 	u64 xgxs_rxgxs_err_mask;
 	u64 xgxs_rxgxs_err_alarm;
 
@@ -942,10 +942,10 @@
 #define SPI_CONTROL_BYTECNT(cnt)	vBIT(cnt,29,3)
 #define SPI_CONTROL_CMD(cmd)		vBIT(cmd,32,8)
 #define SPI_CONTROL_ADDR(addr)		vBIT(addr,40,24)
-#define SPI_CONTROL_SEL1		BIT(4)
-#define SPI_CONTROL_REQ			BIT(7)
-#define SPI_CONTROL_NACK		BIT(5)
-#define SPI_CONTROL_DONE		BIT(6)
+#define SPI_CONTROL_SEL1		s2BIT(4)
+#define SPI_CONTROL_REQ			s2BIT(7)
+#define SPI_CONTROL_NACK		s2BIT(5)
+#define SPI_CONTROL_DONE		s2BIT(6)
 	u64 spi_data;
 #define SPI_DATA_WRITE(data,len)	vBIT(data,0,len)
 };
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 22e4054..b8c0e7b 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -1716,7 +1716,7 @@
 			MISC_LINK_STABILITY_PRD(3);
 		writeq(val64, &bar0->misc_control);
 		val64 = readq(&bar0->pic_control2);
-		val64 &= ~(BIT(13)|BIT(14)|BIT(15));
+		val64 &= ~(s2BIT(13)|s2BIT(14)|s2BIT(15));
 		writeq(val64, &bar0->pic_control2);
 	}
 	if (strstr(nic->product_name, "CX4")) {
@@ -2427,7 +2427,7 @@
 		}
 		if ((rxdp->Control_1 & RXD_OWN_XENA) &&
 			((nic->rxd_mode == RXD_MODE_3B) &&
-				(rxdp->Control_2 & BIT(0)))) {
+				(rxdp->Control_2 & s2BIT(0)))) {
 			mac_control->rings[ring_no].rx_curr_put_info.
 					offset = off;
 			goto end;
@@ -2540,7 +2540,7 @@
 				rxdp->Control_2 |= SET_BUFFER2_SIZE_3
 								(dev->mtu + 4);
 			}
-			rxdp->Control_2 |= BIT(0);
+			rxdp->Control_2 |= s2BIT(0);
 		}
 		rxdp->Host_Control = (unsigned long) (skb);
 		if (alloc_tab & ((1 << rxsync_frequency) - 1))
@@ -3377,7 +3377,7 @@
 		pci_write_config_dword(sp->pdev, 0x68, 0x7C);
 
 		/* Clearing PCI_STATUS error reflected here */
-		writeq(BIT(62), &bar0->txpic_int_reg);
+		writeq(s2BIT(62), &bar0->txpic_int_reg);
 	}
 
 	/* Reset device statistics maintained by OS */
@@ -3575,7 +3575,7 @@
 
 	do {
 		val64 = readq(&bar0->xmsi_access);
-		if (!(val64 & BIT(15)))
+		if (!(val64 & s2BIT(15)))
 			break;
 		mdelay(1);
 		cnt++;
@@ -3597,7 +3597,7 @@
 	for (i=0; i < MAX_REQUESTED_MSI_X; i++) {
 		writeq(nic->msix_info[i].addr, &bar0->xmsi_address);
 		writeq(nic->msix_info[i].data, &bar0->xmsi_data);
-		val64 = (BIT(7) | BIT(15) | vBIT(i, 26, 6));
+		val64 = (s2BIT(7) | s2BIT(15) | vBIT(i, 26, 6));
 		writeq(val64, &bar0->xmsi_access);
 		if (wait_for_msix_trans(nic, i)) {
 			DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__);
@@ -3614,7 +3614,7 @@
 
 	/* Store and display */
 	for (i=0; i < MAX_REQUESTED_MSI_X; i++) {
-		val64 = (BIT(15) | vBIT(i, 26, 6));
+		val64 = (s2BIT(15) | vBIT(i, 26, 6));
 		writeq(val64, &bar0->xmsi_access);
 		if (wait_for_msix_trans(nic, i)) {
 			DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__);
@@ -4634,7 +4634,7 @@
 		do {
 			udelay(100);
 			val64 = readq(&bar0->stat_cfg);
-			if (!(val64 & BIT(0)))
+			if (!(val64 & s2BIT(0)))
 				break;
 			cnt++;
 			if (cnt == 5)
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h
index f6b4556..cc1797a 100644
--- a/drivers/net/s2io.h
+++ b/drivers/net/s2io.h
@@ -14,7 +14,7 @@
 #define _S2IO_H
 
 #define TBD 0
-#define BIT(loc)		(0x8000000000000000ULL >> (loc))
+#define s2BIT(loc)		(0x8000000000000000ULL >> (loc))
 #define vBIT(val, loc, sz)	(((u64)val) << (64-loc-sz))
 #define INV(d)  ((d&0xff)<<24) | (((d>>8)&0xff)<<16) | (((d>>16)&0xff)<<8)| ((d>>24)&0xff)
 
@@ -473,42 +473,42 @@
 
 	u64 List_Control;
 #define TX_FIFO_LAST_TXD_NUM( val)     vBIT(val,0,8)
-#define TX_FIFO_FIRST_LIST             BIT(14)
-#define TX_FIFO_LAST_LIST              BIT(15)
+#define TX_FIFO_FIRST_LIST             s2BIT(14)
+#define TX_FIFO_LAST_LIST              s2BIT(15)
 #define TX_FIFO_FIRSTNLAST_LIST        vBIT(3,14,2)
-#define TX_FIFO_SPECIAL_FUNC           BIT(23)
-#define TX_FIFO_DS_NO_SNOOP            BIT(31)
-#define TX_FIFO_BUFF_NO_SNOOP          BIT(30)
+#define TX_FIFO_SPECIAL_FUNC           s2BIT(23)
+#define TX_FIFO_DS_NO_SNOOP            s2BIT(31)
+#define TX_FIFO_BUFF_NO_SNOOP          s2BIT(30)
 };
 
 /* Tx descriptor structure */
 struct TxD {
 	u64 Control_1;
 /* bit mask */
-#define TXD_LIST_OWN_XENA       BIT(7)
-#define TXD_T_CODE              (BIT(12)|BIT(13)|BIT(14)|BIT(15))
+#define TXD_LIST_OWN_XENA       s2BIT(7)
+#define TXD_T_CODE              (s2BIT(12)|s2BIT(13)|s2BIT(14)|s2BIT(15))
 #define TXD_T_CODE_OK(val)      (|(val & TXD_T_CODE))
 #define GET_TXD_T_CODE(val)     ((val & TXD_T_CODE)<<12)
-#define TXD_GATHER_CODE         (BIT(22) | BIT(23))
-#define TXD_GATHER_CODE_FIRST   BIT(22)
-#define TXD_GATHER_CODE_LAST    BIT(23)
-#define TXD_TCP_LSO_EN          BIT(30)
-#define TXD_UDP_COF_EN          BIT(31)
-#define TXD_UFO_EN		BIT(31) | BIT(30)
+#define TXD_GATHER_CODE         (s2BIT(22) | s2BIT(23))
+#define TXD_GATHER_CODE_FIRST   s2BIT(22)
+#define TXD_GATHER_CODE_LAST    s2BIT(23)
+#define TXD_TCP_LSO_EN          s2BIT(30)
+#define TXD_UDP_COF_EN          s2BIT(31)
+#define TXD_UFO_EN		s2BIT(31) | s2BIT(30)
 #define TXD_TCP_LSO_MSS(val)    vBIT(val,34,14)
 #define TXD_UFO_MSS(val)	vBIT(val,34,14)
 #define TXD_BUFFER0_SIZE(val)   vBIT(val,48,16)
 
 	u64 Control_2;
-#define TXD_TX_CKO_CONTROL      (BIT(5)|BIT(6)|BIT(7))
-#define TXD_TX_CKO_IPV4_EN      BIT(5)
-#define TXD_TX_CKO_TCP_EN       BIT(6)
-#define TXD_TX_CKO_UDP_EN       BIT(7)
-#define TXD_VLAN_ENABLE         BIT(15)
+#define TXD_TX_CKO_CONTROL      (s2BIT(5)|s2BIT(6)|s2BIT(7))
+#define TXD_TX_CKO_IPV4_EN      s2BIT(5)
+#define TXD_TX_CKO_TCP_EN       s2BIT(6)
+#define TXD_TX_CKO_UDP_EN       s2BIT(7)
+#define TXD_VLAN_ENABLE         s2BIT(15)
 #define TXD_VLAN_TAG(val)       vBIT(val,16,16)
 #define TXD_INT_NUMBER(val)     vBIT(val,34,6)
-#define TXD_INT_TYPE_PER_LIST   BIT(47)
-#define TXD_INT_TYPE_UTILZ      BIT(46)
+#define TXD_INT_TYPE_PER_LIST   s2BIT(47)
+#define TXD_INT_TYPE_UTILZ      s2BIT(46)
 #define TXD_SET_MARKER         vBIT(0x6,0,4)
 
 	u64 Buffer_Pointer;
@@ -525,14 +525,14 @@
 struct RxD_t {
 	u64 Host_Control;	/* reserved for host */
 	u64 Control_1;
-#define RXD_OWN_XENA            BIT(7)
-#define RXD_T_CODE              (BIT(12)|BIT(13)|BIT(14)|BIT(15))
+#define RXD_OWN_XENA            s2BIT(7)
+#define RXD_T_CODE              (s2BIT(12)|s2BIT(13)|s2BIT(14)|s2BIT(15))
 #define RXD_FRAME_PROTO         vBIT(0xFFFF,24,8)
-#define RXD_FRAME_PROTO_IPV4    BIT(27)
-#define RXD_FRAME_PROTO_IPV6    BIT(28)
-#define RXD_FRAME_IP_FRAG	BIT(29)
-#define RXD_FRAME_PROTO_TCP     BIT(30)
-#define RXD_FRAME_PROTO_UDP     BIT(31)
+#define RXD_FRAME_PROTO_IPV4    s2BIT(27)
+#define RXD_FRAME_PROTO_IPV6    s2BIT(28)
+#define RXD_FRAME_IP_FRAG	s2BIT(29)
+#define RXD_FRAME_PROTO_TCP     s2BIT(30)
+#define RXD_FRAME_PROTO_UDP     s2BIT(31)
 #define TCP_OR_UDP_FRAME        (RXD_FRAME_PROTO_TCP | RXD_FRAME_PROTO_UDP)
 #define RXD_GET_L3_CKSUM(val)   ((u16)(val>> 16) & 0xFFFF)
 #define RXD_GET_L4_CKSUM(val)   ((u16)(val) & 0xFFFF)
@@ -998,26 +998,26 @@
 /*  Interrupt masks for the general interrupt mask register */
 #define DISABLE_ALL_INTRS   0xFFFFFFFFFFFFFFFFULL
 
-#define TXPIC_INT_M         BIT(0)
-#define TXDMA_INT_M         BIT(1)
-#define TXMAC_INT_M         BIT(2)
-#define TXXGXS_INT_M        BIT(3)
-#define TXTRAFFIC_INT_M     BIT(8)
-#define PIC_RX_INT_M        BIT(32)
-#define RXDMA_INT_M         BIT(33)
-#define RXMAC_INT_M         BIT(34)
-#define MC_INT_M            BIT(35)
-#define RXXGXS_INT_M        BIT(36)
-#define RXTRAFFIC_INT_M     BIT(40)
+#define TXPIC_INT_M         s2BIT(0)
+#define TXDMA_INT_M         s2BIT(1)
+#define TXMAC_INT_M         s2BIT(2)
+#define TXXGXS_INT_M        s2BIT(3)
+#define TXTRAFFIC_INT_M     s2BIT(8)
+#define PIC_RX_INT_M        s2BIT(32)
+#define RXDMA_INT_M         s2BIT(33)
+#define RXMAC_INT_M         s2BIT(34)
+#define MC_INT_M            s2BIT(35)
+#define RXXGXS_INT_M        s2BIT(36)
+#define RXTRAFFIC_INT_M     s2BIT(40)
 
 /*  PIC level Interrupts TODO*/
 
 /*  DMA level Inressupts */
-#define TXDMA_PFC_INT_M     BIT(0)
-#define TXDMA_PCC_INT_M     BIT(2)
+#define TXDMA_PFC_INT_M     s2BIT(0)
+#define TXDMA_PCC_INT_M     s2BIT(2)
 
 /*  PFC block interrupts */
-#define PFC_MISC_ERR_1      BIT(0)	/* Interrupt to indicate FIFO full */
+#define PFC_MISC_ERR_1      s2BIT(0)	/* Interrupt to indicate FIFO full */
 
 /* PCC block interrupts. */
 #define	PCC_FB_ECC_ERR	   vBIT(0xff, 16, 8)	/* Interrupt to indicate
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index fab055f..571060a 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -46,7 +46,7 @@
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/pci-bridge.h>
 #include <net/checksum.h>
 
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 76e5561..a7afeea 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -34,9 +34,9 @@
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/dma-mapping.h>
+#include <linux/bitops.h>
 
 #include <asm/processor.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/uaccess.h>
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
index 8f198be..cb51dc5 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
@@ -29,7 +29,7 @@
 #include "bcm43xx_radio.h"
 #include "bcm43xx.h"
 
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 
 static void bcm43xx_led_changestate(struct bcm43xx_led *led)
diff --git a/drivers/net/wireless/hostap/hostap_common.h b/drivers/net/wireless/hostap/hostap_common.h
index ceb7f1e..517f898 100644
--- a/drivers/net/wireless/hostap/hostap_common.h
+++ b/drivers/net/wireless/hostap/hostap_common.h
@@ -4,9 +4,6 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
-#define BIT(x) (1 << (x))
-
-
 /* IEEE 802.11 defines */
 
 /* Information Element IDs */
diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c
index 40f516d..d8f5efc 100644
--- a/drivers/net/wireless/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/hostap/hostap_ioctl.c
@@ -2920,7 +2920,7 @@
 
 	printk(KERN_DEBUG "%s: process %d (%s) used deprecated iwpriv monitor "
 	       "- update software to use iwconfig mode monitor\n",
-	       dev->name, current->pid, current->comm);
+	       dev->name, task_pid_nr(current), current->comm);
 
 	/* Backward compatibility code - this can be removed at some point */
 
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 2fa8eed..7a77975 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -1858,14 +1858,6 @@
 
 	modify_acceptable_latency("ipw2100", INFINITE_LATENCY);
 
-#ifdef ACPI_CSTATE_LIMIT_DEFINED
-	if (priv->config & CFG_C3_DISABLED) {
-		IPW_DEBUG_INFO(": Resetting C3 transitions.\n");
-		acpi_set_cstate_limit(priv->cstate_limit);
-		priv->config &= ~CFG_C3_DISABLED;
-	}
-#endif
-
 	/* We have to signal any supplicant if we are disassociating */
 	if (associated)
 		wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL);
@@ -2091,14 +2083,6 @@
 	/* RF_KILL is now enabled (else we wouldn't be here) */
 	priv->status |= STATUS_RF_KILL_HW;
 
-#ifdef ACPI_CSTATE_LIMIT_DEFINED
-	if (priv->config & CFG_C3_DISABLED) {
-		IPW_DEBUG_INFO(": Resetting C3 transitions.\n");
-		acpi_set_cstate_limit(priv->cstate_limit);
-		priv->config &= ~CFG_C3_DISABLED;
-	}
-#endif
-
 	/* Make sure the RF Kill check timer is running */
 	priv->stop_rf_kill = 0;
 	cancel_delayed_work(&priv->rf_kill);
@@ -2363,23 +2347,10 @@
 	u32 match, reg;
 	int j;
 #endif
-#ifdef ACPI_CSTATE_LIMIT_DEFINED
-	int limit;
-#endif
 
 	IPW_DEBUG_INFO(": PCI latency error detected at 0x%04zX.\n",
 		       i * sizeof(struct ipw2100_status));
 
-#ifdef ACPI_CSTATE_LIMIT_DEFINED
-	IPW_DEBUG_INFO(": Disabling C3 transitions.\n");
-	limit = acpi_get_cstate_limit();
-	if (limit > 2) {
-		priv->cstate_limit = limit;
-		acpi_set_cstate_limit(2);
-		priv->config |= CFG_C3_DISABLED;
-	}
-#endif
-
 #ifdef IPW2100_DEBUG_C3
 	/* Halt the fimrware so we can get a good image */
 	write_register(priv->net_dev, IPW_REG_RESET_REG,
diff --git a/drivers/net/wireless/ipw2100.h b/drivers/net/wireless/ipw2100.h
index 1ee3348..bbf1ddc 100644
--- a/drivers/net/wireless/ipw2100.h
+++ b/drivers/net/wireless/ipw2100.h
@@ -479,7 +479,6 @@
 #define CFG_ASSOCIATE           (1<<6)
 #define CFG_FIXED_RATE          (1<<7)
 #define CFG_ADHOC_CREATE        (1<<8)
-#define CFG_C3_DISABLED         (1<<9)
 #define CFG_PASSIVE_SCAN        (1<<10)
 #ifdef CONFIG_IPW2100_MONITOR
 #define CFG_CRC_CHECK           (1<<11)
@@ -508,7 +507,6 @@
 	u8 bssid[ETH_ALEN];
 	u8 channel;
 	int last_mode;
-	int cstate_limit;
 
 	unsigned long connect_start;
 	unsigned long last_reset;
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index 67d28ee..c5e0d89 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -22,9 +22,9 @@
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/bitops.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/system.h>
 #include <asm/addrspace.h>
 
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
index b019854..d182760 100644
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ b/drivers/pcmcia/m8xx_pcmcia.c
@@ -48,9 +48,9 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/fsl_devices.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/system.h>
 #include <asm/time.h>
 #include <asm/mpc8xx.h>
diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c
index 397f4ce..87b3493 100644
--- a/drivers/ps3/ps3av.c
+++ b/drivers/ps3/ps3av.c
@@ -729,7 +729,7 @@
 
 static const struct ps3av_monitor_quirk {
 	const char *monitor_name;
-	u32 clear_60, clear_50, clear_vesa;
+	u32 clear_60;
 } ps3av_monitor_quirks[] = {
 	{
 		.monitor_name	= "DELL 2007WFP",
@@ -757,10 +757,6 @@
 				quirk->monitor_name);
 			info->res_60.res_bits &= ~quirk->clear_60;
 			info->res_60.native &= ~quirk->clear_60;
-			info->res_50.res_bits &= ~quirk->clear_50;
-			info->res_50.native &= ~quirk->clear_50;
-			info->res_vesa.res_bits &= ~quirk->clear_vesa;
-			info->res_vesa.native &= ~quirk->clear_vesa;
 			break;
 		}
 	}
diff --git a/drivers/ps3/vuart.c b/drivers/ps3/vuart.c
index bea25a1..9dea585 100644
--- a/drivers/ps3/vuart.c
+++ b/drivers/ps3/vuart.c
@@ -22,11 +22,11 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
+#include <linux/bitops.h>
 #include <asm/ps3.h>
 
 #include <asm/firmware.h>
 #include <asm/lv1call.h>
-#include <asm/bitops.h>
 
 #include "vuart.h"
 
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index e4bf68c..2fd49ed 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -21,11 +21,11 @@
 #include <linux/interrupt.h>
 #include <linux/string.h>
 #include <linux/pm.h>
+#include <linux/bitops.h>
 
 #include <linux/amba/bus.h>
 
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/hardware.h>
 #include <asm/irq.h>
 #include <asm/rtc.h>
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 0918b78..6f1e9a9 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -29,8 +29,8 @@
 #include <linux/interrupt.h>
 #include <linux/string.h>
 #include <linux/pm.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/hardware.h>
 #include <asm/irq.h>
 #include <asm/rtc.h>
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
index 16ea828..ef7bc0a 100644
--- a/drivers/s390/cio/idset.c
+++ b/drivers/s390/cio/idset.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/slab.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include "idset.h"
 #include "css.h"
 
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 399695f..3561982 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -59,13 +59,13 @@
  *    1.15  Changed for 2.6 Kernel  No longer compiles on 2.4 or lower
  *    1.25  Added Packing support
  */
-#include <asm/bitops.h>
 #include <asm/ccwdev.h>
 #include <asm/ccwgroup.h>
 #include <asm/debug.h>
 #include <asm/idals.h>
 #include <asm/io.h>
 
+#include <linux/bitops.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
diff --git a/drivers/scsi/FlashPoint.c b/drivers/scsi/FlashPoint.c
index a7f916c..1c90781 100644
--- a/drivers/scsi/FlashPoint.c
+++ b/drivers/scsi/FlashPoint.c
@@ -25,9 +25,6 @@
 
 #define FAILURE         0xFFFFFFFFL
 
-#define BIT(x)          ((unsigned char)(1<<(x)))	/* single-bit mask in bit position x */
-#define BITW(x)          ((unsigned short)(1<<(x)))	/* single-bit mask in bit position x */
-
 struct sccb;
 typedef void (*CALL_BK_FN) (struct sccb *);
 
@@ -374,9 +371,9 @@
 #define  SCAM_ENABLED   BIT(2)
 #define  SCAM_LEVEL2    BIT(3)
 
-#define	RENEGO_ENA		BITW(10)
-#define	CONNIO_ENA		BITW(11)
-#define  GREEN_PC_ENA   BITW(12)
+#define	RENEGO_ENA		BIT(10)
+#define	CONNIO_ENA		BIT(11)
+#define  GREEN_PC_ENA   BIT(12)
 
 #define  AUTO_RATE_00   00
 #define  AUTO_RATE_05   01
@@ -511,23 +508,23 @@
 
 #define  hp_intena		 0x40
 
-#define  RESET		 BITW(7)
-#define  PROG_HLT		 BITW(6)
-#define  PARITY		 BITW(5)
-#define  FIFO		 BITW(4)
-#define  SEL		 BITW(3)
-#define  SCAM_SEL		 BITW(2)
-#define  RSEL		 BITW(1)
-#define  TIMEOUT		 BITW(0)
-#define  BUS_FREE		 BITW(15)
-#define  XFER_CNT_0	 BITW(14)
-#define  PHASE		 BITW(13)
-#define  IUNKWN		 BITW(12)
-#define  ICMD_COMP	 BITW(11)
-#define  ITICKLE		 BITW(10)
-#define  IDO_STRT		 BITW(9)
-#define  ITAR_DISC	 BITW(8)
-#define  AUTO_INT		 (BITW(12)+BITW(11)+BITW(10)+BITW(9)+BITW(8))
+#define  RESET		 BIT(7)
+#define  PROG_HLT		 BIT(6)
+#define  PARITY		 BIT(5)
+#define  FIFO		 BIT(4)
+#define  SEL		 BIT(3)
+#define  SCAM_SEL		 BIT(2)
+#define  RSEL		 BIT(1)
+#define  TIMEOUT		 BIT(0)
+#define  BUS_FREE		 BIT(15)
+#define  XFER_CNT_0	 BIT(14)
+#define  PHASE		 BIT(13)
+#define  IUNKWN		 BIT(12)
+#define  ICMD_COMP	 BIT(11)
+#define  ITICKLE		 BIT(10)
+#define  IDO_STRT		 BIT(9)
+#define  ITAR_DISC	 BIT(8)
+#define  AUTO_INT		 (BIT(12)+BIT(11)+BIT(10)+BIT(9)+BIT(8))
 #define  CLR_ALL_INT	 0xFFFF
 #define  CLR_ALL_INT_1	 0xFF00
 
@@ -674,37 +671,37 @@
 #define  BIOS_DATA_OFFSET     0x60
 #define  BIOS_RELATIVE_CARD   0x64
 
-#define  AR3      (BITW(9) + BITW(8))
-#define  SDATA    BITW(10)
+#define  AR3      (BIT(9) + BIT(8))
+#define  SDATA    BIT(10)
 
-#define  CRD_OP   BITW(11)	/* Cmp Reg. w/ Data */
+#define  CRD_OP   BIT(11)	/* Cmp Reg. w/ Data */
 
-#define  CRR_OP   BITW(12)	/* Cmp Reg. w. Reg. */
+#define  CRR_OP   BIT(12)	/* Cmp Reg. w. Reg. */
 
-#define  CPE_OP   (BITW(14)+BITW(11))	/* Cmp SCSI phs & Branch EQ */
+#define  CPE_OP   (BIT(14)+BIT(11))	/* Cmp SCSI phs & Branch EQ */
 
-#define  CPN_OP   (BITW(14)+BITW(12))	/* Cmp SCSI phs & Branch NOT EQ */
+#define  CPN_OP   (BIT(14)+BIT(12))	/* Cmp SCSI phs & Branch NOT EQ */
 
 #define  ADATA_OUT   0x00
-#define  ADATA_IN    BITW(8)
-#define  ACOMMAND    BITW(10)
-#define  ASTATUS     (BITW(10)+BITW(8))
-#define  AMSG_OUT    (BITW(10)+BITW(9))
-#define  AMSG_IN     (BITW(10)+BITW(9)+BITW(8))
+#define  ADATA_IN    BIT(8)
+#define  ACOMMAND    BIT(10)
+#define  ASTATUS     (BIT(10)+BIT(8))
+#define  AMSG_OUT    (BIT(10)+BIT(9))
+#define  AMSG_IN     (BIT(10)+BIT(9)+BIT(8))
 
-#define  BRH_OP   BITW(13)	/* Branch */
+#define  BRH_OP   BIT(13)	/* Branch */
 
 #define  ALWAYS   0x00
-#define  EQUAL    BITW(8)
-#define  NOT_EQ   BITW(9)
+#define  EQUAL    BIT(8)
+#define  NOT_EQ   BIT(9)
 
-#define  TCB_OP   (BITW(13)+BITW(11))	/* Test condition & branch */
+#define  TCB_OP   (BIT(13)+BIT(11))	/* Test condition & branch */
 
-#define  FIFO_0      BITW(10)
+#define  FIFO_0      BIT(10)
 
-#define  MPM_OP   BITW(15)	/* Match phase and move data */
+#define  MPM_OP   BIT(15)	/* Match phase and move data */
 
-#define  MRR_OP   BITW(14)	/* Move DReg. to Reg. */
+#define  MRR_OP   BIT(14)	/* Move DReg. to Reg. */
 
 #define  S_IDREG  (BIT(2)+BIT(1)+BIT(0))
 
@@ -712,9 +709,9 @@
 #define  D_AR1    BIT(0)
 #define  D_BUCKET (BIT(2) + BIT(1) + BIT(0))
 
-#define  RAT_OP      (BITW(14)+BITW(13)+BITW(11))
+#define  RAT_OP      (BIT(14)+BIT(13)+BIT(11))
 
-#define  SSI_OP      (BITW(15)+BITW(11))
+#define  SSI_OP      (BIT(15)+BIT(11))
 
 #define  SSI_ITAR_DISC	(ITAR_DISC >> 8)
 #define  SSI_IDO_STRT	(IDO_STRT >> 8)
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 30905ce..a5763c6 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -521,7 +521,7 @@
 
 config SCSI_ADVANSYS
 	tristate "AdvanSys SCSI support"
-	depends on SCSI
+	depends on SCSI && VIRT_TO_BUS
 	depends on ISA || EISA || PCI
 	help
 	  This is a driver for all SCSI host adapters manufactured by
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index fa7ba64..252d180 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -47,9 +47,9 @@
 #include <linux/scatterlist.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #include <scsi/scsi.h>
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 7ef0afc..5f3a0d7 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -285,7 +285,7 @@
 	dev = port->port_dev;
 
 	SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id,
-		    current->pid);
+		    task_pid_nr(current));
 
 	switch (dev->dev_type) {
 	case SAS_END_DEV:
@@ -320,7 +320,7 @@
 	}
 
 	SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id,
-		    current->pid, error);
+		    task_pid_nr(current), error);
 }
 
 static void sas_revalidate_domain(struct work_struct *work)
@@ -334,12 +334,12 @@
 			&port->disc.pending);
 
 	SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id,
-		    current->pid);
+		    task_pid_nr(current));
 	if (port->port_dev)
 		res = sas_ex_revalidate_domain(port->port_dev);
 
 	SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n",
-		    port->id, current->pid, res);
+		    port->id, task_pid_nr(current), res);
 }
 
 /* ---------- Events ---------- */
diff --git a/drivers/scsi/nsp32.h b/drivers/scsi/nsp32.h
index a976e81..6715ecb 100644
--- a/drivers/scsi/nsp32.h
+++ b/drivers/scsi/nsp32.h
@@ -69,11 +69,6 @@
 typedef u16 u16_le;
 
 /*
- * MACRO
- */
-#define BIT(x)      (1UL << (x))
-
-/*
  * BASIC Definitions
  */
 #ifndef TRUE
diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h
index b7f0fa2..9839755 100644
--- a/drivers/scsi/pcmcia/nsp_cs.h
+++ b/drivers/scsi/pcmcia/nsp_cs.h
@@ -24,7 +24,6 @@
 /************************************
  * Some useful macros...
  */
-#define BIT(x)      (1L << (x))
 
 /* SCSI initiator must be ID 7 */
 #define NSP_INITIATOR_ID  7
diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
index 9bb3d1d..fe415ec 100644
--- a/drivers/scsi/qla4xxx/ql4_fw.h
+++ b/drivers/scsi/qla4xxx/ql4_fw.h
@@ -671,7 +671,7 @@
 #define ET_CONTINUE	ET_CONT_T1
 
 /* Marker entry structure*/
-struct marker_entry {
+struct qla4_marker_entry {
 	struct qla4_header hdr;	/* 00-03 */
 
 	uint32_t system_defined; /* 04-07 */
diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c
index 5006ecb..e4461b5 100644
--- a/drivers/scsi/qla4xxx/ql4_iocb.c
+++ b/drivers/scsi/qla4xxx/ql4_iocb.c
@@ -69,7 +69,7 @@
 static int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha,
 				    struct ddb_entry *ddb_entry, int lun)
 {
-	struct marker_entry *marker_entry;
+	struct qla4_marker_entry *marker_entry;
 	unsigned long flags = 0;
 	uint8_t status = QLA_SUCCESS;
 
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 72229df..40604a0 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -263,15 +263,15 @@
 	unsigned int result = 0;
 	unsigned int status = readw(uap->port.membase + UART01x_FR);
 
-#define BIT(uartbit, tiocmbit)		\
+#define TIOCMBIT(uartbit, tiocmbit)	\
 	if (status & uartbit)		\
 		result |= tiocmbit
 
-	BIT(UART01x_FR_DCD, TIOCM_CAR);
-	BIT(UART01x_FR_DSR, TIOCM_DSR);
-	BIT(UART01x_FR_CTS, TIOCM_CTS);
-	BIT(UART011_FR_RI, TIOCM_RNG);
-#undef BIT
+	TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR);
+	TIOCMBIT(UART01x_FR_DSR, TIOCM_DSR);
+	TIOCMBIT(UART01x_FR_CTS, TIOCM_CTS);
+	TIOCMBIT(UART011_FR_RI, TIOCM_RNG);
+#undef TIOCMBIT
 	return result;
 }
 
@@ -282,18 +282,18 @@
 
 	cr = readw(uap->port.membase + UART011_CR);
 
-#define	BIT(tiocmbit, uartbit)		\
+#define	TIOCMBIT(tiocmbit, uartbit)		\
 	if (mctrl & tiocmbit)		\
 		cr |= uartbit;		\
 	else				\
 		cr &= ~uartbit
 
-	BIT(TIOCM_RTS, UART011_CR_RTS);
-	BIT(TIOCM_DTR, UART011_CR_DTR);
-	BIT(TIOCM_OUT1, UART011_CR_OUT1);
-	BIT(TIOCM_OUT2, UART011_CR_OUT2);
-	BIT(TIOCM_LOOP, UART011_CR_LBE);
-#undef BIT
+	TIOCMBIT(TIOCM_RTS, UART011_CR_RTS);
+	TIOCMBIT(TIOCM_DTR, UART011_CR_DTR);
+	TIOCMBIT(TIOCM_OUT1, UART011_CR_OUT1);
+	TIOCMBIT(TIOCM_OUT2, UART011_CR_OUT2);
+	TIOCMBIT(TIOCM_LOOP, UART011_CR_LBE);
+#undef TIOCMBIT
 
 	writew(cr, uap->port.membase + UART011_CR);
 }
diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index 7e8724d..f523cdf 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -442,11 +442,11 @@
 #include <asm/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <linux/delay.h>
 
 #include <asm/arch/svinto.h>
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index f013b40..1f4f6d0 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -460,7 +460,7 @@
 		return 0;
 	/* if not yet claimed, claim it for the driver */
 	dev_warn(&ps->dev->dev, "usbfs: process %d (%s) did not claim interface %u before use\n",
-	       current->pid, current->comm, ifnum);
+	       task_pid_nr(current), current->comm, ifnum);
 	return claimintf(ps, ifnum);
 }
 
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 73726c5..1d174dc 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -4006,7 +4006,7 @@
 	DBG(fsg, "removable=%d, stall=%d, buflen=%u\n",
 			mod_data.removable, mod_data.can_stall,
 			mod_data.buflen);
-	DBG(fsg, "I/O thread pid: %d\n", fsg->thread_task->pid);
+	DBG(fsg, "I/O thread pid: %d\n", task_pid_nr(fsg->thread_task));
 
 	set_bit(REGISTERED, &fsg->atomic_bitflags);
 
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 9bb2cbf..5fb8675 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -62,7 +62,7 @@
 	struct display_switch	*dispsw;
 	struct display		*display;
 	struct pci_dev		*dev;
-	unsigned char 		__iomem *region;
+	unsigned char		__iomem *region;
 	unsigned char		__iomem *regs;
 	u_int			id;
 	int			func_use_count;
@@ -97,11 +97,11 @@
 /*
  * Our access methods.
  */
-#define cyber2000fb_writel(val,reg,cfb)	writel(val, (cfb)->regs + (reg))
-#define cyber2000fb_writew(val,reg,cfb)	writew(val, (cfb)->regs + (reg))
-#define cyber2000fb_writeb(val,reg,cfb)	writeb(val, (cfb)->regs + (reg))
+#define cyber2000fb_writel(val, reg, cfb)	writel(val, (cfb)->regs + (reg))
+#define cyber2000fb_writew(val, reg, cfb)	writew(val, (cfb)->regs + (reg))
+#define cyber2000fb_writeb(val, reg, cfb)	writeb(val, (cfb)->regs + (reg))
 
-#define cyber2000fb_readb(reg,cfb)	readb((cfb)->regs + (reg))
+#define cyber2000fb_readb(reg, cfb)		readb((cfb)->regs + (reg))
 
 static inline void
 cyber2000_crtcw(unsigned int reg, unsigned int val, struct cfb_info *cfb)
@@ -221,12 +221,8 @@
 static void
 cyber2000fb_imageblit(struct fb_info *info, const struct fb_image *image)
 {
-//	struct cfb_info *cfb = (struct cfb_info *)info;
-
-//	if (!(cfb->fb.var.accel_flags & FB_ACCELF_TEXT)) {
-		cfb_imageblit(info, image);
-		return;
-//	}
+	cfb_imageblit(info, image);
+	return;
 }
 
 static int cyber2000fb_sync(struct fb_info *info)
@@ -277,12 +273,12 @@
 
 	/*
 	 * Pseudocolour:
-	 *         8     8
+	 *	   8     8
 	 * pixel --/--+--/-->  red lut  --> red dac
-	 *            |  8
-	 *            +--/--> green lut --> green dac
-	 *            |  8
-	 *            +--/-->  blue lut --> blue dac
+	 *	      |  8
+	 *	      +--/--> green lut --> green dac
+	 *	      |  8
+	 *	      +--/-->  blue lut --> blue dac
 	 */
 	case FB_VISUAL_PSEUDOCOLOR:
 		if (regno >= NR_PALETTE)
@@ -292,9 +288,9 @@
 		green >>= 8;
 		blue >>= 8;
 
-		cfb->palette[regno].red   = red;
+		cfb->palette[regno].red = red;
 		cfb->palette[regno].green = green;
-		cfb->palette[regno].blue  = blue;
+		cfb->palette[regno].blue = blue;
 
 		cyber2000fb_writeb(regno, 0x3c8, cfb);
 		cyber2000fb_writeb(red, 0x3c9, cfb);
@@ -304,12 +300,12 @@
 
 	/*
 	 * Direct colour:
-	 *          n     rl
-	 *  pixel --/--+--/-->  red lut  --> red dac
-	 *             |  gl
-	 *             +--/--> green lut --> green dac
-	 *             |  bl
-	 *             +--/-->  blue lut --> blue dac
+	 *	   n     rl
+	 * pixel --/--+--/-->  red lut  --> red dac
+	 *	      |  gl
+	 *	      +--/--> green lut --> green dac
+	 *	      |  bl
+	 *	      +--/-->  blue lut --> blue dac
 	 * n = bpp, rl = red length, gl = green length, bl = blue length
 	 */
 	case FB_VISUAL_DIRECTCOLOR:
@@ -325,9 +321,11 @@
 			 * to the high 6 bits of the LUT.
 			 */
 			cyber2000fb_writeb(regno << 2, 0x3c8, cfb);
-			cyber2000fb_writeb(cfb->palette[regno >> 1].red, 0x3c9, cfb);
+			cyber2000fb_writeb(cfb->palette[regno >> 1].red,
+					   0x3c9, cfb);
 			cyber2000fb_writeb(green, 0x3c9, cfb);
-			cyber2000fb_writeb(cfb->palette[regno >> 1].blue, 0x3c9, cfb);
+			cyber2000fb_writeb(cfb->palette[regno >> 1].blue,
+					   0x3c9, cfb);
 
 			green = cfb->palette[regno << 3].green;
 
@@ -335,9 +333,9 @@
 		}
 
 		if (var->green.length >= 5 && regno < 32) {
-			cfb->palette[regno << 3].red   = red;
+			cfb->palette[regno << 3].red = red;
 			cfb->palette[regno << 3].green = green;
-			cfb->palette[regno << 3].blue  = blue;
+			cfb->palette[regno << 3].blue = blue;
 
 			/*
 			 * The 5 bits of each colour component are
@@ -351,9 +349,9 @@
 		}
 
 		if (var->green.length == 4 && regno < 16) {
-			cfb->palette[regno << 4].red   = red;
+			cfb->palette[regno << 4].red = red;
 			cfb->palette[regno << 4].green = green;
-			cfb->palette[regno << 4].blue  = blue;
+			cfb->palette[regno << 4].blue = blue;
 
 			/*
 			 * The 5 bits of each colour component are
@@ -377,12 +375,12 @@
 
 	/*
 	 * True colour:
-	 *          n     rl
-	 *  pixel --/--+--/--> red dac
-	 *             |  gl
-	 *             +--/--> green dac
-	 *             |  bl
-	 *             +--/--> blue dac
+	 *	   n     rl
+	 * pixel --/--+--/--> red dac
+	 *	      |  gl
+	 *	      +--/--> green dac
+	 *	      |  bl
+	 *	      +--/--> blue dac
 	 * n = bpp, rl = red length, gl = green length, bl = blue length
 	 */
 	case FB_VISUAL_TRUECOLOR:
@@ -494,9 +492,9 @@
 
 	/* PLL registers */
 	cyber2000_grphw(EXT_DCLK_MULT, hw->clock_mult, cfb);
-	cyber2000_grphw(EXT_DCLK_DIV,  hw->clock_div, cfb);
+	cyber2000_grphw(EXT_DCLK_DIV, hw->clock_div, cfb);
 	cyber2000_grphw(EXT_MCLK_MULT, cfb->mclk_mult, cfb);
-	cyber2000_grphw(EXT_MCLK_DIV,  cfb->mclk_div, cfb);
+	cyber2000_grphw(EXT_MCLK_DIV, cfb->mclk_div, cfb);
 	cyber2000_grphw(0x90, 0x01, cfb);
 	cyber2000_grphw(0xb9, 0x80, cfb);
 	cyber2000_grphw(0xb9, 0x00, cfb);
@@ -515,8 +513,8 @@
 	/*
 	 * Set up accelerator registers
 	 */
-	cyber2000fb_writew(hw->width,     CO_REG_SRC_WIDTH,  cfb);
-	cyber2000fb_writew(hw->width,     CO_REG_DEST_WIDTH, cfb);
+	cyber2000fb_writew(hw->width, CO_REG_SRC_WIDTH, cfb);
+	cyber2000fb_writew(hw->width, CO_REG_DEST_WIDTH, cfb);
 	cyber2000fb_writeb(hw->co_pixfmt, CO_REG_PIXFMT, cfb);
 }
 
@@ -549,15 +547,15 @@
 {
 	u_int Htotal, Hblankend, Hsyncend;
 	u_int Vtotal, Vdispend, Vblankstart, Vblankend, Vsyncstart, Vsyncend;
-#define BIT(v,b1,m,b2) (((v >> b1) & m) << b2)
+#define ENCODE_BIT(v, b1, m, b2) ((((v) >> (b1)) & (m)) << (b2))
 
 	hw->crtc[13] = hw->pitch;
 	hw->crtc[17] = 0xe3;
 	hw->crtc[14] = 0;
 	hw->crtc[8]  = 0;
 
-	Htotal      = var->xres + var->right_margin +
-		      var->hsync_len + var->left_margin;
+	Htotal     = var->xres + var->right_margin +
+		     var->hsync_len + var->left_margin;
 
 	if (Htotal > 2080)
 		return -EINVAL;
@@ -567,15 +565,15 @@
 	hw->crtc[2] = var->xres >> 3;
 	hw->crtc[4] = (var->xres + var->right_margin) >> 3;
 
-	Hblankend   = (Htotal - 4*8) >> 3;
+	Hblankend   = (Htotal - 4 * 8) >> 3;
 
-	hw->crtc[3] = BIT(Hblankend,  0, 0x1f,  0) |
-		      BIT(1,          0, 0x01,  7);
+	hw->crtc[3] = ENCODE_BIT(Hblankend,  0, 0x1f,  0) |
+		      ENCODE_BIT(1,          0, 0x01,  7);
 
 	Hsyncend    = (var->xres + var->right_margin + var->hsync_len) >> 3;
 
-	hw->crtc[5] = BIT(Hsyncend,   0, 0x1f,  0) |
-		      BIT(Hblankend,  5, 0x01,  7);
+	hw->crtc[5] = ENCODE_BIT(Hsyncend,   0, 0x1f,  0) |
+		      ENCODE_BIT(Hblankend,  5, 0x01,  7);
 
 	Vdispend    = var->yres - 1;
 	Vsyncstart  = var->yres + var->lower_margin;
@@ -590,20 +588,20 @@
 	Vblankend   = Vtotal - 10;
 
 	hw->crtc[6]  = Vtotal;
-	hw->crtc[7]  = BIT(Vtotal,     8, 0x01,  0) |
-			BIT(Vdispend,   8, 0x01,  1) |
-			BIT(Vsyncstart, 8, 0x01,  2) |
-			BIT(Vblankstart,8, 0x01,  3) |
-			BIT(1,          0, 0x01,  4) |
-	        	BIT(Vtotal,     9, 0x01,  5) |
-			BIT(Vdispend,   9, 0x01,  6) |
-			BIT(Vsyncstart, 9, 0x01,  7);
-	hw->crtc[9]  = BIT(0,          0, 0x1f,  0) |
-		        BIT(Vblankstart,9, 0x01,  5) |
-			BIT(1,          0, 0x01,  6);
+	hw->crtc[7]  = ENCODE_BIT(Vtotal,     8, 0x01,  0) |
+			ENCODE_BIT(Vdispend,   8, 0x01,  1) |
+			ENCODE_BIT(Vsyncstart, 8, 0x01,  2) |
+			ENCODE_BIT(Vblankstart, 8, 0x01,  3) |
+			ENCODE_BIT(1,          0, 0x01,  4) |
+			ENCODE_BIT(Vtotal,     9, 0x01,  5) |
+			ENCODE_BIT(Vdispend,   9, 0x01,  6) |
+			ENCODE_BIT(Vsyncstart, 9, 0x01,  7);
+	hw->crtc[9]  = ENCODE_BIT(0,          0, 0x1f,  0) |
+			ENCODE_BIT(Vblankstart, 9, 0x01,  5) |
+			ENCODE_BIT(1,          0, 0x01,  6);
 	hw->crtc[10] = Vsyncstart;
-	hw->crtc[11] = BIT(Vsyncend,   0, 0x0f,  0) |
-		       BIT(1,          0, 0x01,  7);
+	hw->crtc[11] = ENCODE_BIT(Vsyncend,   0, 0x0f,  0) |
+		       ENCODE_BIT(1,          0, 0x01,  7);
 	hw->crtc[12] = Vdispend;
 	hw->crtc[15] = Vblankstart;
 	hw->crtc[16] = Vblankend;
@@ -615,10 +613,10 @@
 	 * 4=LINECOMP:10 5-IVIDEO 6=FIXCNT
 	 */
 	hw->crtc_ofl =
-		BIT(Vtotal,     10, 0x01,  0) |
-		BIT(Vdispend,   10, 0x01,  1) |
-		BIT(Vsyncstart, 10, 0x01,  2) |
-		BIT(Vblankstart,10, 0x01,  3) |
+		ENCODE_BIT(Vtotal, 10, 0x01, 0) |
+		ENCODE_BIT(Vdispend, 10, 0x01, 1) |
+		ENCODE_BIT(Vsyncstart, 10, 0x01, 2) |
+		ENCODE_BIT(Vblankstart, 10, 0x01, 3) |
 		EXT_CRT_VRTOFL_LINECOMP10;
 
 	/* woody: set the interlaced bit... */
@@ -750,11 +748,11 @@
 	var->red.msb_right	= 0;
 	var->green.msb_right	= 0;
 	var->blue.msb_right	= 0;
+	var->transp.offset	= 0;
+	var->transp.length	= 0;
 
 	switch (var->bits_per_pixel) {
 	case 8:	/* PSEUDOCOLOUR, 256 */
-		var->transp.offset	= 0;
-		var->transp.length	= 0;
 		var->red.offset		= 0;
 		var->red.length		= 8;
 		var->green.offset	= 0;
@@ -766,8 +764,6 @@
 	case 16:/* DIRECTCOLOUR, 64k or 32k */
 		switch (var->green.length) {
 		case 6: /* RGB565, 64k */
-			var->transp.offset	= 0;
-			var->transp.length	= 0;
 			var->red.offset		= 11;
 			var->red.length		= 5;
 			var->green.offset	= 5;
@@ -778,8 +774,6 @@
 
 		default:
 		case 5: /* RGB555, 32k */
-			var->transp.offset	= 0;
-			var->transp.length	= 0;
 			var->red.offset		= 10;
 			var->red.length		= 5;
 			var->green.offset	= 5;
@@ -802,8 +796,6 @@
 		break;
 
 	case 24:/* TRUECOLOUR, 16m */
-		var->transp.offset	= 0;
-		var->transp.length	= 0;
 		var->red.offset		= 16;
 		var->red.length		= 8;
 		var->green.offset	= 8;
@@ -830,7 +822,7 @@
 	mem = var->xres_virtual * var->yres_virtual * (var->bits_per_pixel / 8);
 	if (mem > cfb->fb.fix.smem_len)
 		var->yres_virtual = cfb->fb.fix.smem_len * 8 /
-			(var->bits_per_pixel * var->xres_virtual);
+				    (var->bits_per_pixel * var->xres_virtual);
 
 	if (var->yres > var->yres_virtual)
 		var->yres = var->yres_virtual;
@@ -921,7 +913,7 @@
 		hw.fetch <<= 1;
 	hw.fetch += 1;
 
-	cfb->fb.fix.line_length	= var->xres_virtual * var->bits_per_pixel / 8;
+	cfb->fb.fix.line_length = var->xres_virtual * var->bits_per_pixel / 8;
 
 	/*
 	 * Same here - if the size of the video mode exceeds the
@@ -952,7 +944,6 @@
 	return 0;
 }
 
-
 /*
  *    Pan or Wrap the Display
  */
@@ -1002,15 +993,15 @@
 	switch (blank) {
 	case FB_BLANK_POWERDOWN:	/* powerdown - both sync lines down */
 		sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_0;
-		break;	
+		break;
 	case FB_BLANK_HSYNC_SUSPEND:	/* hsync off */
 		sync = EXT_SYNC_CTL_VS_NORMAL | EXT_SYNC_CTL_HS_0;
-		break;	
+		break;
 	case FB_BLANK_VSYNC_SUSPEND:	/* vsync off */
 		sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_NORMAL;
 		break;
-	case FB_BLANK_NORMAL:	        /* soft blank */
-	default: /* unblank */
+	case FB_BLANK_NORMAL:		/* soft blank */
+	default:			/* unblank */
 		break;
 	}
 
@@ -1018,7 +1009,8 @@
 
 	if (blank <= 1) {
 		/* turn on ramdacs */
-		cfb->ramdac_powerdown &= ~(RAMDAC_DACPWRDN | RAMDAC_BYPASS | RAMDAC_RAMPWRDN);
+		cfb->ramdac_powerdown &= ~(RAMDAC_DACPWRDN | RAMDAC_BYPASS |
+					   RAMDAC_RAMPWRDN);
 		cyber2000fb_write_ramdac_ctrl(cfb);
 	}
 
@@ -1043,7 +1035,8 @@
 
 	if (blank >= 2) {
 		/* turn off ramdacs */
-		cfb->ramdac_powerdown |= RAMDAC_DACPWRDN | RAMDAC_BYPASS | RAMDAC_RAMPWRDN;
+		cfb->ramdac_powerdown |= RAMDAC_DACPWRDN | RAMDAC_BYPASS |
+					 RAMDAC_RAMPWRDN;
 		cyber2000fb_write_ramdac_ctrl(cfb);
 	}
 
@@ -1068,7 +1061,7 @@
  * of this driver.  It is here solely at the moment to support the other
  * CyberPro modules external to this driver.
  */
-static struct cfb_info		*int_cfb_info;
+static struct cfb_info *int_cfb_info;
 
 /*
  * Enable access to the extended registers
@@ -1085,6 +1078,7 @@
 		cyber2000_grphw(EXT_FUNC_CTL, old, cfb);
 	}
 }
+EXPORT_SYMBOL(cyber2000fb_enable_extregs);
 
 /*
  * Disable access to the extended registers
@@ -1104,11 +1098,13 @@
 	else
 		cfb->func_use_count -= 1;
 }
+EXPORT_SYMBOL(cyber2000fb_disable_extregs);
 
 void cyber2000fb_get_fb_var(struct cfb_info *cfb, struct fb_var_screeninfo *var)
 {
 	memcpy(var, &cfb->fb.var, sizeof(struct fb_var_screeninfo));
 }
+EXPORT_SYMBOL(cyber2000fb_get_fb_var);
 
 /*
  * Attach a capture/tv driver to the core CyberX0X0 driver.
@@ -1122,13 +1118,15 @@
 		info->fb_size	      = int_cfb_info->fb.fix.smem_len;
 		info->enable_extregs  = cyber2000fb_enable_extregs;
 		info->disable_extregs = cyber2000fb_disable_extregs;
-		info->info            = int_cfb_info;
+		info->info	      = int_cfb_info;
 
-		strlcpy(info->dev_name, int_cfb_info->fb.fix.id, sizeof(info->dev_name));
+		strlcpy(info->dev_name, int_cfb_info->fb.fix.id,
+			sizeof(info->dev_name));
 	}
 
 	return int_cfb_info != NULL;
 }
+EXPORT_SYMBOL(cyber2000fb_attach);
 
 /*
  * Detach a capture/tv driver from the core CyberX0X0 driver.
@@ -1136,12 +1134,7 @@
 void cyber2000fb_detach(int idx)
 {
 }
-
-EXPORT_SYMBOL(cyber2000fb_attach);
 EXPORT_SYMBOL(cyber2000fb_detach);
-EXPORT_SYMBOL(cyber2000fb_enable_extregs);
-EXPORT_SYMBOL(cyber2000fb_disable_extregs);
-EXPORT_SYMBOL(cyber2000fb_get_fb_var);
 
 /*
  * These parameters give
@@ -1205,7 +1198,7 @@
 	int i;
 
 	for (i = 0; i < sizeof(igs_regs); i += 2)
-		cyber2000_grphw(igs_regs[i], igs_regs[i+1], cfb);
+		cyber2000_grphw(igs_regs[i], igs_regs[i + 1], cfb);
 
 	if (cfb->id == ID_CYBERPRO_5000) {
 		unsigned char val;
@@ -1215,8 +1208,8 @@
 	}
 }
 
-static struct cfb_info * __devinit
-cyberpro_alloc_fb_info(unsigned int id, char *name)
+static struct cfb_info __devinit *cyberpro_alloc_fb_info(unsigned int id,
+							 char *name)
 {
 	struct cfb_info *cfb;
 
@@ -1228,9 +1221,9 @@
 	cfb->id			= id;
 
 	if (id == ID_CYBERPRO_5000)
-		cfb->ref_ps	= 40690; // 24.576 MHz
+		cfb->ref_ps	= 40690; /* 24.576 MHz */
 	else
-		cfb->ref_ps	= 69842; // 14.31818 MHz (69841?)
+		cfb->ref_ps	= 69842; /* 14.31818 MHz (69841?) */
 
 	cfb->divisors[0]	= 1;
 	cfb->divisors[1]	= 2;
@@ -1282,8 +1275,7 @@
 	return cfb;
 }
 
-static void
-cyberpro_free_fb_info(struct cfb_info *cfb)
+static void cyberpro_free_fb_info(struct cfb_info *cfb)
 {
 	if (cfb) {
 		/*
@@ -1300,8 +1292,7 @@
  *  video=cyber2000:font:fontname
  */
 #ifndef MODULE
-static int
-cyber2000fb_setup(char *options)
+static int cyber2000fb_setup(char *options)
 {
 	char *opt;
 
@@ -1315,7 +1306,8 @@
 		if (strncmp(opt, "font:", 5) == 0) {
 			static char default_font_storage[40];
 
-			strlcpy(default_font_storage, opt + 5, sizeof(default_font_storage));
+			strlcpy(default_font_storage, opt + 5,
+				sizeof(default_font_storage));
 			default_font = default_font_storage;
 			continue;
 		}
@@ -1354,10 +1346,18 @@
 	 * Determine the size of the memory.
 	 */
 	switch (cfb->mem_ctl2 & MEM_CTL2_SIZE_MASK) {
-	case MEM_CTL2_SIZE_4MB:	smem_size = 0x00400000; break;
-	case MEM_CTL2_SIZE_2MB:	smem_size = 0x00200000; break;
-	case MEM_CTL2_SIZE_1MB: smem_size = 0x00100000; break;
-	default:		smem_size = 0x00100000; break;
+	case MEM_CTL2_SIZE_4MB:
+		smem_size = 0x00400000;
+		break;
+	case MEM_CTL2_SIZE_2MB:
+		smem_size = 0x00200000;
+		break;
+	case MEM_CTL2_SIZE_1MB:
+		smem_size = 0x00100000;
+		break;
+	default:
+		smem_size = 0x00100000;
+		break;
 	}
 
 	cfb->fb.fix.smem_len   = smem_size;
@@ -1366,8 +1366,8 @@
 
 	err = -EINVAL;
 	if (!fb_find_mode(&cfb->fb.var, &cfb->fb, NULL, NULL, 0,
-	    		  &cyber2000fb_default_mode, 8)) {
-		printk("%s: no valid mode found\n", cfb->fb.fix.id);
+			  &cyber2000fb_default_mode, 8)) {
+		printk(KERN_ERR "%s: no valid mode found\n", cfb->fb.fix.id);
 		goto failed;
 	}
 
@@ -1377,7 +1377,7 @@
 	if (cfb->fb.var.yres_virtual < cfb->fb.var.yres)
 		cfb->fb.var.yres_virtual = cfb->fb.var.yres;
 
-//	fb_set_var(&cfb->fb.var, -1, &cfb->fb);
+/*	fb_set_var(&cfb->fb.var, -1, &cfb->fb); */
 
 	/*
 	 * Calculate the hsync and vsync frequencies.  Note that
@@ -1425,20 +1425,20 @@
 
 #include <asm/arch/hardware.h>
 
-static int __devinit
-cyberpro_vl_probe(void)
+static int __devinit cyberpro_vl_probe(void)
 {
 	struct cfb_info *cfb;
 	int err = -ENOMEM;
 
-	if (!request_mem_region(FB_START,FB_SIZE,"CyberPro2010")) return err;
+	if (!request_mem_region(FB_START, FB_SIZE, "CyberPro2010"))
+		return err;
 
 	cfb = cyberpro_alloc_fb_info(ID_CYBERPRO_2010, "CyberPro2010");
 	if (!cfb)
 		goto failed_release;
 
 	cfb->dev = NULL;
-	cfb->region = ioremap(FB_START,FB_SIZE);
+	cfb->region = ioremap(FB_START, FB_SIZE);
 	if (!cfb->region)
 		goto failed_ioremap;
 
@@ -1475,7 +1475,7 @@
 failed_ioremap:
 	cyberpro_free_fb_info(cfb);
 failed_release:
-	release_mem_region(FB_START,FB_SIZE);
+	release_mem_region(FB_START, FB_SIZE);
 
 	return err;
 }
@@ -1538,7 +1538,8 @@
 	 * Allow the CyberPro to accept PCI burst accesses
 	 */
 	if (cfb->id == ID_CYBERPRO_2010) {
-		printk(KERN_INFO "%s: NOT enabling PCI bursts\n", cfb->fb.fix.id);
+		printk(KERN_INFO "%s: NOT enabling PCI bursts\n",
+		       cfb->fb.fix.id);
 	} else {
 		val = cyber2000_grphr(EXT_BUS_CTL, cfb);
 		if (!(val & EXT_BUS_CTL_PCIBURST_WRITE)) {
@@ -1688,9 +1689,10 @@
 }
 
 static struct pci_device_id cyberpro_pci_table[] = {
-//	Not yet
-//	{ PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_1682,
-//		PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_IGA_1682 },
+/*	Not yet
+ *	{ PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_1682,
+ *		PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_IGA_1682 },
+ */
 	{ PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2000,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_CYBERPRO_2000 },
 	{ PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2010,
@@ -1700,7 +1702,7 @@
 	{ 0, }
 };
 
-MODULE_DEVICE_TABLE(pci,cyberpro_pci_table);
+MODULE_DEVICE_TABLE(pci, cyberpro_pci_table);
 
 static struct pci_driver cyberpro_driver = {
 	.name		= "CyberPro",
diff --git a/drivers/video/geode/video_gx.c b/drivers/video/geode/video_gx.c
index 7f3f18d..febf09c 100644
--- a/drivers/video/geode/video_gx.c
+++ b/drivers/video/geode/video_gx.c
@@ -127,7 +127,7 @@
 	int timeout = 1000;
 
 	/* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */
-	if (cpu_data->x86_mask == 1) {
+	if (cpu_data(0).x86_mask == 1) {
 		pll_table = gx_pll_table_14MHz;
 		pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz);
 	} else {
diff --git a/drivers/video/pnx4008/sdum.h b/drivers/video/pnx4008/sdum.h
index e8c5dcd..189c3d6 100644
--- a/drivers/video/pnx4008/sdum.h
+++ b/drivers/video/pnx4008/sdum.h
@@ -77,9 +77,6 @@
 #define CONF_DIRTYDETECTION_OFF	(0x600)
 #define CONF_DIRTYDETECTION_ON	(0x601)
 
-/* Set the corresponding bit. */
-#define BIT(n) (0x1U << (n))
-
 struct dumchannel_uf {
 	int channelnr;
 	u32 *dirty;
diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c
index 38bd373..a684b1e 100644
--- a/drivers/watchdog/at91rm9200_wdt.c
+++ b/drivers/watchdog/at91rm9200_wdt.c
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/init.h>
@@ -19,7 +20,6 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 #include <asm/arch/at91_st.h>
 
diff --git a/drivers/watchdog/ks8695_wdt.c b/drivers/watchdog/ks8695_wdt.c
index 7150fb9..e3a29c3 100644
--- a/drivers/watchdog/ks8695_wdt.c
+++ b/drivers/watchdog/ks8695_wdt.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/init.h>
@@ -18,7 +19,6 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/arch/regs-timer.h>
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index 719b066..635ca45 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -39,11 +39,11 @@
 #include <linux/platform_device.h>
 #include <linux/moduleparam.h>
 #include <linux/clk.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/hardware.h>
-#include <asm/bitops.h>
 
 #include <asm/arch/prcm.h>
 
diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c
index 3475f47..34a2b3b 100644
--- a/drivers/watchdog/sa1100_wdt.c
+++ b/drivers/watchdog/sa1100_wdt.c
@@ -25,13 +25,13 @@
 #include <linux/miscdevice.h>
 #include <linux/watchdog.h>
 #include <linux/init.h>
+#include <linux/bitops.h>
 
 #ifdef CONFIG_ARCH_PXA
 #include <asm/arch/pxa-regs.h>
 #endif
 
 #include <asm/hardware.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #define OSCR_FREQ		CLOCK_TICK_RATE
diff --git a/fs/Kconfig b/fs/Kconfig
index e31f3691..cc28a69 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -220,7 +220,7 @@
 
 config JBD_DEBUG
 	bool "JBD (ext3) debugging support"
-	depends on JBD
+	depends on JBD && DEBUG_FS
 	help
 	  If you are using the ext3 journaled file system (or potentially any
 	  other file system/device using JBD), this option allows you to
@@ -229,10 +229,10 @@
 	  debugging output will be turned off.
 
 	  If you select Y here, then you will be able to turn on debugging
-	  with "echo N > /proc/sys/fs/jbd-debug", where N is a number between
-	  1 and 5, the higher the number, the more debugging output is
-	  generated.  To turn debugging off again, do
-	  "echo 0 > /proc/sys/fs/jbd-debug".
+	  with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
+	  number between 1 and 5, the higher the number, the more debugging
+	  output is generated.  To turn debugging off again, do
+	  "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
 
 config JBD2
 	tristate
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index e7204d7..45f5992 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -80,7 +80,7 @@
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = task_pgrp_nr(current);
 
 	*minproto = *maxproto = AUTOFS_PROTO_VERSION;
 
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index c148953..5efff3c 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -214,8 +214,8 @@
 
 	oz_mode = autofs_oz_mode(sbi);
 	DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, "
-				"oz_mode = %d\n", pid_nr(task_pid(current)),
-				process_group(current), sbi->catatonic,
+				"oz_mode = %d\n", task_pid_nr(current),
+				task_pgrp_nr(current), sbi->catatonic,
 				oz_mode));
 
 	/*
@@ -536,7 +536,7 @@
 	struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
 	void __user *argp = (void __user *)arg;
 
-	DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,process_group(current)));
+	DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current)));
 
 	if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d85f42f..2d4ae40 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -131,7 +131,7 @@
    filesystem without "magic".) */
 
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
 }
 
 /* Does a dentry have some pending activity? */
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index cd81f08..7f05d6c 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -226,7 +226,7 @@
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = task_pgrp_nr(current);
 
 	*minproto = AUTOFS_MIN_PROTO_VERSION;
 	*maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -323,7 +323,7 @@
 	sbi->pipe = NULL;
 	sbi->catatonic = 1;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = task_pgrp_nr(current);
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 45ff3d6..2bbcc81 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -582,7 +582,7 @@
 	oz_mode = autofs4_oz_mode(sbi);
 
 	DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
-		 current->pid, process_group(current), sbi->catatonic, oz_mode);
+		 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
 
 	unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name);
 	if (!unhashed) {
@@ -976,7 +976,7 @@
 	void __user *p = (void __user *)arg;
 
 	DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u",
-		cmd,arg,sbi,process_group(current));
+		cmd,arg,sbi,task_pgrp_nr(current));
 
 	if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6e2f3b8..ba8de7c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1383,10 +1383,10 @@
 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
 	prstatus->pr_sigpend = p->pending.signal.sig[0];
 	prstatus->pr_sighold = p->blocked.sig[0];
-	prstatus->pr_pid = p->pid;
-	prstatus->pr_ppid = p->parent->pid;
-	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = process_session(p);
+	prstatus->pr_pid = task_pid_vnr(p);
+	prstatus->pr_ppid = task_pid_vnr(p->parent);
+	prstatus->pr_pgrp = task_pgrp_vnr(p);
+	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1429,10 +1429,10 @@
 			psinfo->pr_psargs[i] = ' ';
 	psinfo->pr_psargs[len] = 0;
 
-	psinfo->pr_pid = p->pid;
-	psinfo->pr_ppid = p->parent->pid;
-	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = process_session(p);
+	psinfo->pr_pid = task_pid_vnr(p);
+	psinfo->pr_ppid = task_pid_vnr(p->parent);
+	psinfo->pr_pgrp = task_pgrp_vnr(p);
+	psinfo->pr_sid = task_session_vnr(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 033861c..32649f2 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1342,10 +1342,10 @@
 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
 	prstatus->pr_sigpend = p->pending.signal.sig[0];
 	prstatus->pr_sighold = p->blocked.sig[0];
-	prstatus->pr_pid = p->pid;
-	prstatus->pr_ppid = p->parent->pid;
-	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = process_session(p);
+	prstatus->pr_pid = task_pid_vnr(p);
+	prstatus->pr_ppid = task_pid_vnr(p->parent);
+	prstatus->pr_pgrp = task_pgrp_vnr(p);
+	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1391,10 +1391,10 @@
 			psinfo->pr_psargs[i] = ' ';
 	psinfo->pr_psargs[len] = 0;
 
-	psinfo->pr_pid = p->pid;
-	psinfo->pr_ppid = p->parent->pid;
-	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = process_session(p);
+	psinfo->pr_pid = task_pid_vnr(p);
+	psinfo->pr_ppid = task_pid_vnr(p->parent);
+	psinfo->pr_pgrp = task_pgrp_vnr(p);
+	psinfo->pr_sid = task_session_vnr(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index bed6215..3d41916 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,19 @@
+Version 1.51
+------------
+Fix memory leak in statfs when mounted to very old servers (e.g.
+Windows 9x).  Add new feature "POSIX open" which allows servers
+which support the current POSIX Extensions to provide better semantics
+(e.g. delete for open files opened with posix open).  Take into
+account umask on posix mkdir not just older style mkdir.  Add
+ability to mount to IPC$ share (which allows CIFS named pipes to be
+opened, read and written as if they were files).  When 1st tree
+connect fails (e.g. due to signing negotiation failure) fix
+leak that causes cifsd not to stop and rmmod to fail to cleanup
+cifs_request_buffers pool. Fix problem with POSIX Open/Mkdir on
+bigendian architectures. Fix possible memory corruption when
+EAGAIN returned on kern_recvmsg. Return better error if server
+requires packet signing but client has disabled it.
+
 Version 1.50
 ------------
 Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is
@@ -6,7 +22,10 @@
 Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix"
 mount option to allow disabling the CIFS Unix Extensions for just
 that mount. Fix hang on spinlock in find_writable_file (race when
-reopening file after session crash).
+reopening file after session crash).  Byte range unlock request to
+windows server could unlock more bytes (on server copy of file)
+than intended if start of unlock request is well before start of
+a previous byte range lock that we issued.
 
 Version 1.49
 ------------
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 6ecd9d6..ff6ba8d 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -3,4 +3,4 @@
 #
 obj-$(CONFIG_CIFS) += cifs.o
 
-cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o export.o
+cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o export.o cifsacl.o
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index f50a88d..2a01f3e 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -385,10 +385,9 @@
 	unsigned long *optr;
 
 	size = eoc - ctx->pointer + 1;
-	*oid = kmalloc(size * sizeof (unsigned long), GFP_ATOMIC);
-	if (*oid == NULL) {
+	*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
+	if (*oid == NULL)
 		return 0;
-	}
 
 	optr = *oid;
 
@@ -581,9 +580,8 @@
 			return 0;
 		} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
 			   || (tag != ASN1_SEQ)) {
-			cFYI(1,
-			     ("Exit 6 cls = %d con = %d tag = %d end = %p (%d)",
-			      cls, con, tag, end, *end));
+			cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
+				cls, con, tag, end, *end));
 		}
 
 		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 1bf8cf5..73c4c41 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -209,13 +209,16 @@
 		i++;
 		tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
 		dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
-		length =
-		    sprintf(buf,
-			    "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x "
-			    "Attributes: 0x%x\nPathComponentMax: %d Status: %d",
-			    i, tcon->treeName,
-			    atomic_read(&tcon->useCount),
-			    tcon->nativeFileSystem,
+		length = sprintf(buf, "\n%d) %s Uses: %d ", i,
+				 tcon->treeName, atomic_read(&tcon->useCount));
+		buf += length;
+		if (tcon->nativeFileSystem) {
+			length = sprintf(buf, "Type: %s ",
+					 tcon->nativeFileSystem);
+			buf += length;
+		}
+		length = sprintf(buf, "DevInfo: 0x%x Attributes: 0x%x"
+				 "\nPathComponentMax: %d Status: %d",
 			    le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
 			    le32_to_cpu(tcon->fsAttrInfo.Attributes),
 			    le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
@@ -876,11 +879,16 @@
 	if (count < 3) {
 		/* single char or single char followed by null */
 		c = flags_string[0];
-		if (c == '0' || c == 'n' || c == 'N')
+		if (c == '0' || c == 'n' || c == 'N') {
 			extended_security = CIFSSEC_DEF; /* default */
-		else if (c == '1' || c == 'y' || c == 'Y')
+			return count;
+		} else if (c == '1' || c == 'y' || c == 'Y') {
 			extended_security = CIFSSEC_MAX;
-		return count;
+			return count;
+		} else if (!isdigit(c)) {
+			cERROR(1, ("invalid flag %c", c));
+			return -EINVAL;
+		}
 	}
 	/* else we have a number */
 
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
new file mode 100644
index 0000000..e8e5635
--- /dev/null
+++ b/fs/cifs/cifsacl.c
@@ -0,0 +1,333 @@
+/*
+ *   fs/cifs/cifsacl.c
+ *
+ *   Copyright (C) International Business Machines  Corp., 2007
+ *   Author(s): Steve French (sfrench@us.ibm.com)
+ *
+ *   Contains the routines for mapping CIFS/NTFS ACLs
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/fs.h>
+#include "cifspdu.h"
+#include "cifsglob.h"
+#include "cifsacl.h"
+#include "cifsproto.h"
+#include "cifs_debug.h"
+
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+
+static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
+	{{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
+	{{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
+	{{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
+	{{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
+	{{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"},
+	{{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"},
+	{{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"}
+};
+
+
+/* security id for everyone */
+static const struct cifs_sid sid_everyone =
+		{1, 1, {0, 0, 0, 0, 0, 0}, {} };
+/* group users */
+static const struct cifs_sid sid_user =
+		{1, 2 , {0, 0, 0, 0, 0, 5}, {} };
+
+
+int match_sid(struct cifs_sid *ctsid)
+{
+	int i, j;
+	int num_subauth, num_sat, num_saw;
+	struct cifs_sid *cwsid;
+
+	if (!ctsid)
+		return (-1);
+
+	for (i = 0; i < NUM_WK_SIDS; ++i) {
+		cwsid = &(wksidarr[i].cifssid);
+
+		/* compare the revision */
+		if (ctsid->revision != cwsid->revision)
+			continue;
+
+		/* compare all of the six auth values */
+		for (j = 0; j < 6; ++j) {
+			if (ctsid->authority[j] != cwsid->authority[j])
+				break;
+		}
+		if (j < 6)
+			continue; /* all of the auth values did not match */
+
+		/* compare all of the subauth values if any */
+		num_sat = ctsid->num_subauth;
+		num_saw = cwsid->num_subauth;
+		num_subauth = num_sat < num_saw ? num_sat : num_saw;
+		if (num_subauth) {
+			for (j = 0; j < num_subauth; ++j) {
+				if (ctsid->sub_auth[j] != cwsid->sub_auth[j])
+					break;
+			}
+			if (j < num_subauth)
+				continue; /* all sub_auth values do not match */
+		}
+
+		cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname));
+		return (0); /* sids compare/match */
+	}
+
+	cFYI(1, ("No matching sid"));
+	return (-1);
+}
+
+/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
+   the same returns 1, if they do not match returns 0 */
+int compare_sids(struct cifs_sid *ctsid, struct cifs_sid *cwsid)
+{
+	int i;
+	int num_subauth, num_sat, num_saw;
+
+	if ((!ctsid) || (!cwsid))
+		return (0);
+
+	/* compare the revision */
+	if (ctsid->revision != cwsid->revision)
+		return (0);
+
+	/* compare all of the six auth values */
+	for (i = 0; i < 6; ++i) {
+		if (ctsid->authority[i] != cwsid->authority[i])
+			return (0);
+	}
+
+	/* compare all of the subauth values if any */
+	num_sat = ctsid->num_subauth;
+	num_saw = cwsid->num_subauth;
+	num_subauth = num_sat < num_saw ? num_sat : num_saw;
+	if (num_subauth) {
+		for (i = 0; i < num_subauth; ++i) {
+			if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
+				return (0);
+		}
+	}
+
+	return (1); /* sids compare/match */
+}
+
+
+static void parse_ace(struct cifs_ace *pace, char *end_of_acl)
+{
+	int num_subauth;
+
+	/* validate that we do not go past end of acl */
+
+	/* XXX this if statement can be removed
+	if (end_of_acl < (char *)pace + sizeof(struct cifs_ace)) {
+		cERROR(1, ("ACL too small to parse ACE"));
+		return;
+	} */
+
+	num_subauth = pace->num_subauth;
+	if (num_subauth) {
+#ifdef CONFIG_CIFS_DEBUG2
+		int i;
+		cFYI(1, ("ACE revision %d num_subauth %d",
+			pace->revision, pace->num_subauth));
+		for (i = 0; i < num_subauth; ++i) {
+			cFYI(1, ("ACE sub_auth[%d]: 0x%x", i,
+				le32_to_cpu(pace->sub_auth[i])));
+		}
+
+		/* BB add length check to make sure that we do not have huge
+			num auths and therefore go off the end */
+
+		cFYI(1, ("RID %d", le32_to_cpu(pace->sub_auth[num_subauth-1])));
+#endif
+	}
+
+	return;
+}
+
+static void parse_ntace(struct cifs_ntace *pntace, char *end_of_acl)
+{
+	/* validate that we do not go past end of acl */
+	if (end_of_acl < (char *)pntace + sizeof(struct cifs_ntace)) {
+		cERROR(1, ("ACL too small to parse NT ACE"));
+		return;
+	}
+
+#ifdef CONFIG_CIFS_DEBUG2
+	cFYI(1, ("NTACE type %d flags 0x%x size %d, access Req 0x%x",
+		pntace->type, pntace->flags, pntace->size,
+		pntace->access_req));
+#endif
+	return;
+}
+
+
+
+static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
+		       struct cifs_sid *pownersid, struct cifs_sid *pgrpsid)
+{
+	int i;
+	int num_aces = 0;
+	int acl_size;
+	char *acl_base;
+	struct cifs_ntace **ppntace;
+	struct cifs_ace **ppace;
+
+	/* BB need to add parm so we can store the SID BB */
+
+	/* validate that we do not go past end of acl */
+	if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
+		cERROR(1, ("ACL too small to parse DACL"));
+		return;
+	}
+
+#ifdef CONFIG_CIFS_DEBUG2
+	cFYI(1, ("DACL revision %d size %d num aces %d",
+		le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
+		le32_to_cpu(pdacl->num_aces)));
+#endif
+
+	acl_base = (char *)pdacl;
+	acl_size = sizeof(struct cifs_acl);
+
+	num_aces = le32_to_cpu(pdacl->num_aces);
+	if (num_aces  > 0) {
+		ppntace = kmalloc(num_aces * sizeof(struct cifs_ntace *),
+				GFP_KERNEL);
+		ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
+				GFP_KERNEL);
+
+/*		cifscred->cecount = pdacl->num_aces;
+		cifscred->ntaces = kmalloc(num_aces *
+			sizeof(struct cifs_ntace *), GFP_KERNEL);
+		cifscred->aces = kmalloc(num_aces *
+			sizeof(struct cifs_ace *), GFP_KERNEL);*/
+
+		for (i = 0; i < num_aces; ++i) {
+			ppntace[i] = (struct cifs_ntace *)
+					(acl_base + acl_size);
+			ppace[i] = (struct cifs_ace *) ((char *)ppntace[i] +
+					sizeof(struct cifs_ntace));
+
+			parse_ntace(ppntace[i], end_of_acl);
+			if (end_of_acl < ((char *)ppace[i] +
+					(le16_to_cpu(ppntace[i]->size) -
+					sizeof(struct cifs_ntace)))) {
+				cERROR(1, ("ACL too small to parse ACE"));
+				break;
+			} else
+				parse_ace(ppace[i], end_of_acl);
+
+/*			memcpy((void *)(&(cifscred->ntaces[i])),
+				(void *)ppntace[i],
+				sizeof(struct cifs_ntace));
+			memcpy((void *)(&(cifscred->aces[i])),
+				(void *)ppace[i],
+				sizeof(struct cifs_ace)); */
+
+			acl_base = (char *)ppntace[i];
+			acl_size = le16_to_cpu(ppntace[i]->size);
+		}
+
+		kfree(ppace);
+		kfree(ppntace);
+	}
+
+	return;
+}
+
+
+static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
+{
+
+	/* BB need to add parm so we can store the SID BB */
+
+	/* validate that we do not go past end of acl */
+	if (end_of_acl < (char *)psid + sizeof(struct cifs_sid)) {
+		cERROR(1, ("ACL too small to parse SID"));
+		return -EINVAL;
+	}
+
+	if (psid->num_subauth) {
+#ifdef CONFIG_CIFS_DEBUG2
+		int i;
+		cFYI(1, ("SID revision %d num_auth %d First subauth 0x%x",
+			psid->revision, psid->num_subauth, psid->sub_auth[0]));
+
+		for (i = 0; i < psid->num_subauth; i++) {
+			cFYI(1, ("SID sub_auth[%d]: 0x%x ", i,
+				le32_to_cpu(psid->sub_auth[i])));
+		}
+
+		/* BB add length check to make sure that we do not have huge
+			num auths and therefore go off the end */
+		cFYI(1, ("RID 0x%x",
+			le32_to_cpu(psid->sub_auth[psid->num_subauth-1])));
+#endif
+	}
+
+	return 0;
+}
+
+
+/* Convert CIFS ACL to POSIX form */
+int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len)
+{
+	int rc;
+	struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
+	struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
+	char *end_of_acl = ((char *)pntsd) + acl_len;
+
+	owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+				le32_to_cpu(pntsd->osidoffset));
+	group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+				le32_to_cpu(pntsd->gsidoffset));
+	dacl_ptr = (struct cifs_acl *)((char *)pntsd +
+				le32_to_cpu(pntsd->dacloffset));
+#ifdef CONFIG_CIFS_DEBUG2
+	cFYI(1, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x "
+		 "sacloffset 0x%x dacloffset 0x%x",
+		 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
+		 le32_to_cpu(pntsd->gsidoffset),
+		 le32_to_cpu(pntsd->sacloffset),
+		 le32_to_cpu(pntsd->dacloffset)));
+#endif
+	rc = parse_sid(owner_sid_ptr, end_of_acl);
+	if (rc)
+		return rc;
+
+	rc = parse_sid(group_sid_ptr, end_of_acl);
+	if (rc)
+		return rc;
+
+	parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr);
+
+/*	cifscred->uid = owner_sid_ptr->rid;
+	cifscred->gid = group_sid_ptr->rid;
+	memcpy((void *)(&(cifscred->osid)), (void *)owner_sid_ptr,
+			sizeof (struct cifs_sid));
+	memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
+			sizeof (struct cifs_sid)); */
+
+
+	return (0);
+}
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 5eff35d..420f878 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/cifsacl.h
  *
- *   Copyright (c) International Business Machines  Corp., 2005
+ *   Copyright (c) International Business Machines  Corp., 2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -22,17 +22,65 @@
 #ifndef _CIFSACL_H
 #define _CIFSACL_H
 
-struct cifs_sid {
-	__u8 revision; /* revision level */
-	__u8 num_subauths;
-	__u8 authority[6];
-	__u32 sub_auth[4];
-	/* next sub_auth if any ... */
+
+#define NUM_AUTHS 6 /* number of authority fields */
+#define NUM_SUBAUTHS 5 /* number of sub authority fields */
+#define NUM_WK_SIDS 7 /* number of well known sids */
+#define SIDNAMELENGTH 20 /* long enough for the ones we care about */
+
+#define READ_BIT        0x4
+#define WRITE_BIT       0x2
+#define EXEC_BIT        0x1
+
+#define UBITSHIFT	6
+#define GBITSHIFT	3
+
+struct cifs_ntsd {
+	__le16 revision; /* revision level */
+	__le16 type;
+	__le32 osidoffset;
+	__le32 gsidoffset;
+	__le32 sacloffset;
+	__le32 dacloffset;
 } __attribute__((packed));
 
-/* everyone */
-/* extern const struct cifs_sid sid_everyone;*/
-/* group users */
-/* extern const struct cifs_sid sid_user;*/
+struct cifs_sid {
+	__u8 revision; /* revision level */
+	__u8 num_subauth;
+	__u8 authority[6];
+	__le32 sub_auth[5]; /* sub_auth[num_subauth] */ /* BB FIXME endianness BB */
+} __attribute__((packed));
+
+struct cifs_acl {
+	__le16 revision; /* revision level */
+	__le16 size;
+	__le32 num_aces;
+} __attribute__((packed));
+
+struct cifs_ntace { /* first part of ACE which contains perms */
+	__u8 type;
+	__u8 flags;
+	__le16 size;
+	__le32 access_req;
+} __attribute__((packed));
+
+struct cifs_ace { /* last part of ACE which includes user info */
+	__u8 revision; /* revision level */
+	__u8 num_subauth;
+	__u8 authority[6];
+	__le32 sub_auth[5];
+} __attribute__((packed));
+
+struct cifs_wksid {
+	struct cifs_sid cifssid;
+	char sidname[SIDNAMELENGTH];
+} __attribute__((packed));
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+
+extern int match_sid(struct cifs_sid *);
+extern int compare_sids(struct cifs_sid *, struct cifs_sid *);
+
+#endif /*  CONFIG_CIFS_EXPERIMENTAL */
 
 #endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 3627229..632070b 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -345,7 +345,7 @@
 	user = kmalloc(2 + (len * 2), GFP_KERNEL);
 	if (user == NULL)
 		goto calc_exit_2;
-	len = cifs_strtoUCS(user, ses->userName, len, nls_cp);
+	len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
 	UniStrupr(user);
 	hmac_md5_update((char *)user, 2*len, pctxt);
 
@@ -356,7 +356,8 @@
 		domain = kmalloc(2 + (len * 2), GFP_KERNEL);
 		if (domain == NULL)
 			goto calc_exit_1;
-		len = cifs_strtoUCS(domain, ses->domainName, len, nls_cp);
+		len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
+					nls_cp);
 		/* the following line was removed since it didn't work well
 		   with lower cased domain name that passed as an option.
 		   Maybe converting the domain name earlier makes sense */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ba8f786..a6fbea5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -49,10 +49,6 @@
 static struct quotactl_ops cifs_quotactl_ops;
 #endif /* QUOTA */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-extern struct export_operations cifs_export_ops;
-#endif /* EXPERIMENTAL */
-
 int cifsFYI = 0;
 int cifsERROR = 1;
 int traceSMB = 0;
@@ -240,9 +236,9 @@
 
 	cifs_sb = CIFS_SB(inode->i_sb);
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
 		return 0;
-	} else /* file mode might have been restricted at mount time
+	else /* file mode might have been restricted at mount time
 		on the client (above and beyond ACL on servers) for
 		servers which do not support setting and viewing mode bits,
 		so allowing client to check permissions is useful */
@@ -312,15 +308,15 @@
 					seq_printf(s, ",domain=%s",
 					   cifs_sb->tcon->ses->domainName);
 			}
+			if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
+			   !(cifs_sb->tcon->unix_ext))
+				seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
+			if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
+			   !(cifs_sb->tcon->unix_ext))
+				seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
 		}
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
 			seq_printf(s, ",posixpaths");
-		if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
-		   !(cifs_sb->tcon->unix_ext))
-			seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
-		if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
-		   !(cifs_sb->tcon->unix_ext))
-			seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
 		seq_printf(s, ",rsize=%d", cifs_sb->rsize);
 		seq_printf(s, ",wsize=%d", cifs_sb->wsize);
 	}
@@ -346,7 +342,7 @@
 	if (pTcon) {
 		cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
 	} else {
-		return -EIO;
+		rc = -EIO;
 	}
 
 	FreeXid(xid);
@@ -716,7 +712,7 @@
 cifs_init_inodecache(void)
 {
 	cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
-					      sizeof (struct cifsInodeInfo),
+					      sizeof(struct cifsInodeInfo),
 					      0, (SLAB_RECLAIM_ACCOUNT|
 						SLAB_MEM_SPREAD),
 					      cifs_init_once);
@@ -816,8 +812,8 @@
 cifs_init_mids(void)
 {
 	cifs_mid_cachep = kmem_cache_create("cifs_mpx_ids",
-				sizeof (struct mid_q_entry), 0,
-				SLAB_HWCACHE_ALIGN, NULL);
+					    sizeof(struct mid_q_entry), 0,
+					    SLAB_HWCACHE_ALIGN, NULL);
 	if (cifs_mid_cachep == NULL)
 		return -ENOMEM;
 
@@ -829,8 +825,8 @@
 	}
 
 	cifs_oplock_cachep = kmem_cache_create("cifs_oplock_structs",
-				sizeof (struct oplock_q_entry), 0,
-				SLAB_HWCACHE_ALIGN, NULL);
+					sizeof(struct oplock_q_entry), 0,
+					SLAB_HWCACHE_ALIGN, NULL);
 	if (cifs_oplock_cachep == NULL) {
 		mempool_destroy(cifs_mid_poolp);
 		kmem_cache_destroy(cifs_mid_cachep);
@@ -882,7 +878,8 @@
 				the call */
 				/* mutex_lock(&inode->i_mutex);*/
 				if (S_ISREG(inode->i_mode)) {
-					rc = filemap_fdatawrite(inode->i_mapping);
+					rc =
+					   filemap_fdatawrite(inode->i_mapping);
 					if (CIFS_I(inode)->clientCanCacheRead
 									 == 0) {
 						filemap_fdatawait(inode->i_mapping);
@@ -907,8 +904,7 @@
 					    0 /* len */ , 0 /* offset */, 0,
 					    0, LOCKING_ANDX_OPLOCK_RELEASE,
 					    0 /* wait flag */);
-					cFYI(1, 
-					      ("Oplock release rc = %d ", rc));
+					cFYI(1, ("Oplock release rc = %d", rc));
 				}
 			} else
 				spin_unlock(&GlobalMid_Lock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a20de77..0a3ee5a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/cifsfs.h
  *
- *   Copyright (c) International Business Machines  Corp., 2002, 2005
+ *   Copyright (c) International Business Machines  Corp., 2002, 2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -99,7 +99,12 @@
 			size_t, int);
 extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
-extern int cifs_ioctl (struct inode *inode, struct file *filep,
+extern int cifs_ioctl(struct inode *inode, struct file *filep,
 		       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.50"
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+extern struct export_operations cifs_export_ops;
+#endif /* EXPERIMENTAL */
+
+#define CIFS_VERSION   "1.51"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b98742f..87f51f2 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include "cifs_fs_sb.h"
+#include "cifsacl.h"
 /*
  * The sizes of various internal tables and strings
  */
@@ -89,7 +90,8 @@
 };
 
 enum securityEnum {
-	LANMAN = 0,             /* Legacy LANMAN auth */
+	PLAINTXT = 0, 		/* Legacy with Plaintext passwords */
+	LANMAN,			/* Legacy LANMAN auth */
 	NTLM,			/* Legacy NTLM012 auth with NTLM hash */
 	NTLMv2,			/* Legacy NTLM auth with NTLMv2 hash */
 	RawNTLMSSP,		/* NTLMSSP without SPNEGO */
@@ -115,6 +117,17 @@
 	} data;
 };
 
+struct cifs_cred {
+	int uid;
+	int gid;
+	int mode;
+	int cecount;
+	struct cifs_sid osid;
+	struct cifs_sid gsid;
+	struct cifs_ntace *ntaces;
+	struct cifs_ace *aces;
+};
+
 /*
  *****************************************************************
  * Except the CIFS PDUs themselves all the
@@ -279,6 +292,7 @@
 	FILE_SYSTEM_DEVICE_INFO fsDevInfo;
 	FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
 	FILE_SYSTEM_UNIX_INFO fsUnixInfo;
+	unsigned ipc:1;		/* set if connection to IPC$ eg for RPC/PIPES */
 	unsigned retry:1;
 	unsigned nocase:1;
 	unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol
@@ -329,6 +343,7 @@
 	struct list_head llist; /* list of byte range locks we have. */
 	unsigned closePend:1;	/* file is marked to close */
 	unsigned invalidHandle:1;  /* file closed via session abend */
+	unsigned messageMode:1;    /* for pipes: message vs byte mode */
 	atomic_t wrtPending;   /* handle in use - defer close */
 	struct semaphore fh_sem; /* prevents reopen race after dead ses*/
 	char *search_resume_name; /* BB removeme BB */
@@ -464,6 +479,9 @@
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 #define   CIFSSEC_MAY_LANMAN	0x00010
 #define   CIFSSEC_MAY_PLNTXT	0x00020
+#else
+#define   CIFSSEC_MAY_LANMAN    0
+#define   CIFSSEC_MAY_PLNTXT    0
 #endif /* weak passwords */
 #define   CIFSSEC_MAY_SEAL	0x00040 /* not supported yet */
 
@@ -477,14 +495,23 @@
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 #define   CIFSSEC_MUST_LANMAN	0x10010
 #define   CIFSSEC_MUST_PLNTXT	0x20020
-#define   CIFSSEC_MASK          0x37037 /* current flags supported if weak */
+#ifdef CONFIG_CIFS_UPCALL
+#define   CIFSSEC_MASK          0x3F03F /* allows weak security but also krb5 */
 #else
-#define	  CIFSSEC_MASK          0x07007 /* flags supported if no weak config */
+#define   CIFSSEC_MASK          0x37037 /* current flags supported if weak */
+#endif /* UPCALL */
+#else /* do not allow weak pw hash */
+#ifdef CONFIG_CIFS_UPCALL
+#define   CIFSSEC_MASK          0x0F00F /* flags supported if no weak allowed */
+#else
+#define	  CIFSSEC_MASK          0x07007 /* flags supported if no weak allowed */
+#endif /* UPCALL */
 #endif /* WEAK_PW_HASH */
 #define   CIFSSEC_MUST_SEAL	0x40040 /* not supported yet */
 
 #define   CIFSSEC_DEF  CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2
 #define   CIFSSEC_MAX  CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2
+#define   CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5)
 /*
  *****************************************************************
  * All constants go here
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 6a2056e..c41ff74 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -215,6 +215,12 @@
 					 /* file_execute, file_read_attributes*/
 					 /* write_dac, and delete.           */
 
+#define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES)
+#define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
+				| FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES)
+#define FILE_EXEC_RIGHTS (FILE_EXECUTE)
+
+
 /*
  * Invalid readdir handle
  */
@@ -360,10 +366,10 @@
 	__u8 WordCount;
 } __attribute__((packed));
 /* given a pointer to an smb_hdr retrieve the value of byte count */
-#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) )
-#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) )
+#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
+#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
 /* given a pointer to an smb_hdr retrieve the pointer to the byte area */
-#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) + 2 )
+#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2)
 
 /*
  * Computer Name Length (since Netbios name was length 16 with last byte 0x20)
@@ -716,6 +722,14 @@
 #define REQ_OPENDIRONLY    0x00000008
 #define REQ_EXTENDED_INFO  0x00000010
 
+/* File type */
+#define DISK_TYPE		0x0000
+#define BYTE_PIPE_TYPE		0x0001
+#define MESSAGE_PIPE_TYPE	0x0002
+#define PRINTER_TYPE		0x0003
+#define COMM_DEV_TYPE		0x0004
+#define UNKNOWN_TYPE		0xFFFF
+
 typedef struct smb_com_open_req {	/* also handles create */
 	struct smb_hdr hdr;	/* wct = 24 */
 	__u8 AndXCommand;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 04a69da..1a88366 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -50,7 +50,8 @@
 			int * /* bytes returned */ , const int long_op);
 extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
 			struct kvec *, int /* nvec to send */,
-			int * /* type of buf returned */ , const int long_op);
+			int * /* type of buf returned */ , const int long_op,
+			const int logError /* whether to log status code*/ );
 extern int SendReceiveBlockingLock(const unsigned int /* xid */ ,
 					struct cifsTconInfo *,
 				struct smb_hdr * /* input */ ,
@@ -65,7 +66,7 @@
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			enum securityEnum *secType);
 extern int cifs_inet_pton(int, char *source, void *dst);
-extern int map_smb_to_linux_error(struct smb_hdr *smb);
+extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
 			    const struct cifsTconInfo *, int /* length of
 			    fixed section (word count) in two byte units */);
@@ -304,12 +305,13 @@
 				 const char *pass);
 extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
 			const struct nls_table *);
-extern void CalcNTLMv2_response(const struct cifsSesInfo *, char * );
+extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
 extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
 			     const struct nls_table *);
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key);
 #endif /* CIFS_WEAK_PW_HASH */
+extern int parse_sec_desc(struct cifs_ntsd *, int);
 extern int CIFSSMBCopy(int xid,
 			struct cifsTconInfo *source_tcon,
 			const char *fromName,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 8eb102f..f0d9a48 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -34,10 +34,10 @@
 #include <asm/uaccess.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
+#include "cifsacl.h"
 #include "cifsproto.h"
 #include "cifs_unicode.h"
 #include "cifs_debug.h"
-#include "cifsacl.h"
 
 #ifdef CONFIG_CIFS_POSIX
 static struct {
@@ -94,9 +94,8 @@
 	write_lock(&GlobalSMBSeslock);
 	list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
-		if (open_file) {
+		if (open_file)
 			open_file->invalidHandle = TRUE;
-		}
 	}
 	write_unlock(&GlobalSMBSeslock);
 	/* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -439,8 +438,13 @@
 
 	pSMB->hdr.Mid = GetNextMid(server);
 	pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
+
 	if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
 		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
+	else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
+		cFYI(1, ("Kerberos only mechanism, enable extended security"));
+		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
+	}
 
 	count = 0;
 	for (i = 0; i < CIFS_NUM_PROT; i++) {
@@ -513,7 +517,7 @@
 				(int)ts.tv_sec, (int)utc.tv_sec,
 				(int)(utc.tv_sec - ts.tv_sec)));
 			val = (int)(utc.tv_sec - ts.tv_sec);
-			seconds = val < 0 ? -val : val;
+			seconds = abs(val);
 			result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
 			remain = seconds % MIN_TZ_ADJ;
 			if (remain >= (MIN_TZ_ADJ / 2))
@@ -574,7 +578,20 @@
 		server->secType = NTLM;
 	else if (secFlags & CIFSSEC_MAY_NTLMV2)
 		server->secType = NTLMv2;
-	/* else krb5 ... any others ... */
+	else if (secFlags & CIFSSEC_MAY_KRB5)
+		server->secType = Kerberos;
+	else if (secFlags & CIFSSEC_MAY_LANMAN)
+		server->secType = LANMAN;
+/* #ifdef CONFIG_CIFS_EXPERIMENTAL
+	else if (secFlags & CIFSSEC_MAY_PLNTXT)
+		server->secType = ??
+#endif */
+	else {
+		rc = -EOPNOTSUPP;
+		cERROR(1, ("Invalid security type"));
+		goto neg_err_exit;
+	}
+	/* else ... any others ...? */
 
 	/* one byte, so no need to convert this or EncryptionKeyLen from
 	   little endian */
@@ -604,22 +621,26 @@
 	if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
 		(server->capabilities & CAP_EXTENDED_SECURITY)) {
 		count = pSMBr->ByteCount;
-		if (count < 16)
+		if (count < 16) {
 			rc = -EIO;
-		else if (count == 16) {
-			server->secType = RawNTLMSSP;
-			if (server->socketUseCount.counter > 1) {
-				if (memcmp(server->server_GUID,
-					   pSMBr->u.extended_response.
-					   GUID, 16) != 0) {
-					cFYI(1, ("server UID changed"));
-					memcpy(server->server_GUID,
-						pSMBr->u.extended_response.GUID,
-						16);
-				}
-			} else
+			goto neg_err_exit;
+		}
+
+		if (server->socketUseCount.counter > 1) {
+			if (memcmp(server->server_GUID,
+				   pSMBr->u.extended_response.
+				   GUID, 16) != 0) {
+				cFYI(1, ("server UID changed"));
 				memcpy(server->server_GUID,
-				       pSMBr->u.extended_response.GUID, 16);
+					pSMBr->u.extended_response.GUID,
+					16);
+			}
+		} else
+			memcpy(server->server_GUID,
+			       pSMBr->u.extended_response.GUID, 16);
+
+		if (count == 16) {
+			server->secType = RawNTLMSSP;
 		} else {
 			rc = decode_negTokenInit(pSMBr->u.extended_response.
 						 SecurityBlob,
@@ -642,10 +663,12 @@
 		/* MUST_SIGN already includes the MAY_SIGN FLAG
 		   so if this is zero it means that signing is disabled */
 		cFYI(1, ("Signing disabled"));
-		if (server->secMode & SECMODE_SIGN_REQUIRED)
+		if (server->secMode & SECMODE_SIGN_REQUIRED) {
 			cERROR(1, ("Server requires "
-				   "/proc/fs/cifs/PacketSigningEnabled "
-				   "to be on"));
+				   "packet signing to be enabled in "
+				   "/proc/fs/cifs/SecurityFlags."));
+			rc = -EOPNOTSUPP;
+		}
 		server->secMode &=
 			~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
 	} else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
@@ -1052,7 +1075,7 @@
 				InformationLevel) - 4;
 	offset = param_offset + params;
 	pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset);
-	pdata->Level = SMB_QUERY_FILE_UNIX_BASIC;
+	pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
 	pdata->Permissions = cpu_to_le64(mode);
 	pdata->PosixOpenFlags = cpu_to_le32(posix_flags);
 	pdata->OpenFlags =  cpu_to_le32(*pOplock);
@@ -1098,8 +1121,8 @@
 	if (cpu_to_le32(FILE_CREATE) == psx_rsp->CreateAction)
 		*pOplock |= CIFS_CREATE_ACTION;
 	/* check to make sure response data is there */
-	if (psx_rsp->ReturnedLevel != SMB_QUERY_FILE_UNIX_BASIC) {
-		pRetData->Type = -1; /* unknown */
+	if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) {
+		pRetData->Type = cpu_to_le32(-1); /* unknown */
 #ifdef CONFIG_CIFS_DEBUG2
 		cFYI(1, ("unknown type"));
 #endif
@@ -1107,12 +1130,12 @@
 		if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
 					+ sizeof(FILE_UNIX_BASIC_INFO)) {
 			cERROR(1, ("Open response data too small"));
-			pRetData->Type = -1;
+			pRetData->Type = cpu_to_le32(-1);
 			goto psx_create_err;
 		}
 		memcpy((char *) pRetData,
 			(char *)psx_rsp + sizeof(OPEN_PSX_RSP),
-			sizeof (FILE_UNIX_BASIC_INFO));
+			sizeof(FILE_UNIX_BASIC_INFO));
 	}
 
 psx_create_err:
@@ -1193,9 +1216,9 @@
 	}
 	if (*pOplock & REQ_OPLOCK)
 		pSMB->OpenFlags = cpu_to_le16(REQ_OPLOCK);
-	else if (*pOplock & REQ_BATCHOPLOCK) {
+	else if (*pOplock & REQ_BATCHOPLOCK)
 		pSMB->OpenFlags = cpu_to_le16(REQ_BATCHOPLOCK);
-	}
+
 	pSMB->OpenFlags |= cpu_to_le16(REQ_MORE_INFO);
 	/* BB fixme add conversion for access_flags to bits 0 - 2 of mode */
 	/* 0 = read
@@ -1310,9 +1333,8 @@
 	}
 	if (*pOplock & REQ_OPLOCK)
 		pSMB->OpenFlags = cpu_to_le32(REQ_OPLOCK);
-	else if (*pOplock & REQ_BATCHOPLOCK) {
+	else if (*pOplock & REQ_BATCHOPLOCK)
 		pSMB->OpenFlags = cpu_to_le32(REQ_BATCHOPLOCK);
-	}
 	pSMB->DesiredAccess = cpu_to_le32(access_flags);
 	pSMB->AllocationSize = 0;
 	/* set file as system file if special file such
@@ -1424,9 +1446,8 @@
 
 	iov[0].iov_base = (char *)pSMB;
 	iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
-	rc = SendReceive2(xid, tcon->ses, iov,
-			  1 /* num iovecs */,
-			  &resp_buf_type, 0);
+	rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
+			 &resp_buf_type, 0 /* not long op */, 1 /* log err */ );
 	cifs_stats_inc(&tcon->num_reads);
 	pSMBr = (READ_RSP *)iov[0].iov_base;
 	if (rc) {
@@ -1446,11 +1467,11 @@
 			*nbytes = 0;
 		} else {
 			pReadData = (char *) (&pSMBr->hdr.Protocol) +
-			    le16_to_cpu(pSMBr->DataOffset);
-/*                      if (rc = copy_to_user(buf, pReadData, data_length)) {
+					le16_to_cpu(pSMBr->DataOffset);
+/*			if (rc = copy_to_user(buf, pReadData, data_length)) {
 				cERROR(1,("Faulting on read rc = %d",rc));
 				rc = -EFAULT;
-                        }*/ /* can not use copy_to_user when using page cache*/
+			}*/ /* can not use copy_to_user when using page cache*/
 			if (*buf)
 				memcpy(*buf, pReadData, data_length);
 		}
@@ -1645,7 +1666,7 @@
 
 
 	rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type,
-			  long_op);
+			  long_op, 0 /* do not log STATUS code */ );
 	cifs_stats_inc(&tcon->num_writes);
 	if (rc) {
 		cFYI(1, ("Send error Write2 = %d", rc));
@@ -2538,7 +2559,7 @@
 		cFYI(1, ("data starts after end of smb"));
 		return -EINVAL;
 	} else if (data_count + *ppdata > end_of_smb) {
-		cFYI(1,("data %p + count %d (%p) ends after end of smb %p start %p",
+		cFYI(1, ("data %p + count %d (%p) ends after end of smb %p start %p",
 			*ppdata, data_count, (data_count + *ppdata),
 			end_of_smb, pSMBr));
 		return -EINVAL;
@@ -2615,7 +2636,7 @@
 					reparse_buf->TargetNameOffset +
 					reparse_buf->TargetNameLen) >
 						end_of_smb) {
-					cFYI(1,("reparse buf goes beyond SMB"));
+					cFYI(1, ("reparse buf beyond SMB"));
 					rc = -EIO;
 					goto qreparse_out;
 				}
@@ -3042,25 +3063,12 @@
 
 #endif /* CONFIG_POSIX */
 
-
-/* security id for everyone */
-static const struct cifs_sid sid_everyone =
-		{1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}};
-/* group users */
-static const struct cifs_sid sid_user =
-		{1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}};
-
-/* Convert CIFS ACL to POSIX form */
-static int parse_sec_desc(struct cifs_sid *psec_desc, int acl_len)
-{
-	return 0;
-}
-
+#ifdef CONFIG_CIFS_EXPERIMENTAL
 /* Get Security Descriptor (by handle) from remote server for a file or dir */
 int
 CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
 		/* BB fix up return info */ char *acl_inf, const int buflen,
-		  const int acl_type /* ACCESS/DEFAULT not sure implication */)
+		  const int acl_type)
 {
 	int rc = 0;
 	int buf_type = 0;
@@ -3085,12 +3093,13 @@
 	iov[0].iov_base = (char *)pSMB;
 	iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
 
-	rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 0);
+	rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
+			 0 /* not long op */, 0 /* do not log STATUS codes */ );
 	cifs_stats_inc(&tcon->num_acl_get);
 	if (rc) {
 		cFYI(1, ("Send error in QuerySecDesc = %d", rc));
 	} else {                /* decode response */
-		struct cifs_sid *psec_desc;
+		struct cifs_ntsd *psec_desc;
 		__le32 * parm;
 		int parm_len;
 		int data_len;
@@ -3105,8 +3114,7 @@
 			goto qsec_out;
 		pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
 
-		cERROR(1, ("smb %p parm %p data %p",
-			  pSMBr, parm, psec_desc));  /* BB removeme BB */
+		cFYI(1, ("smb %p parm %p data %p", pSMBr, parm, psec_desc));
 
 		if (le32_to_cpu(pSMBr->ParameterCount) != 4) {
 			rc = -EIO;      /* bad smb */
@@ -3115,7 +3123,7 @@
 
 /* BB check that data area is minimum length and as big as acl_len */
 
-		acl_len = le32_to_cpu(*(__le32 *)parm);
+		acl_len = le32_to_cpu(*parm);
 		/* BB check if (acl_len > bufsize) */
 
 		parse_sec_desc(psec_desc, acl_len);
@@ -3128,6 +3136,7 @@
 /*	cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
 	return rc;
 }
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
 
 /* Legacy Query Path Information call for lookup to old servers such
    as Win9x/WinME */
@@ -3363,6 +3372,9 @@
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
 		if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
+			cERROR(1, ("Malformed FILE_UNIX_BASIC_INFO response.\n"
+				   "Unix Extensions can be disabled on mount "
+				   "by specifying the nosfu mount option."));
 			rc = -EIO;	/* bad smb */
 		} else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3883,12 +3895,10 @@
 	pSMB->hdr.Mid = GetNextMid(ses->server);
 	pSMB->hdr.Tid = ses->ipc_tid;
 	pSMB->hdr.Uid = ses->Suid;
-	if (ses->capabilities & CAP_STATUS32) {
+	if (ses->capabilities & CAP_STATUS32)
 		pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS;
-	}
-	if (ses->capabilities & CAP_DFS) {
+	if (ses->capabilities & CAP_DFS)
 		pSMB->hdr.Flags2 |= SMBFLG2_DFS;
-	}
 
 	if (ses->capabilities & CAP_UNICODE) {
 		pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
@@ -4060,10 +4070,6 @@
 		(void **) &pSMBr);
 	if (rc)
 		return rc;
-	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
-		      (void **) &pSMBr);
-	if (rc)
-		return rc;
 
 	params = 2;     /* level */
 	pSMB->TotalDataCount = 0;
@@ -4265,7 +4271,7 @@
 			     *) (((char *) &pSMBr->hdr.Protocol) +
 				 data_offset);
 			memcpy(&tcon->fsAttrInfo, response_data,
-			       sizeof (FILE_SYSTEM_ATTRIBUTE_INFO));
+			       sizeof(FILE_SYSTEM_ATTRIBUTE_INFO));
 		}
 	}
 	cifs_buf_release(pSMB);
@@ -4334,7 +4340,7 @@
 				(((char *) &pSMBr->hdr.Protocol) +
 				 data_offset);
 			memcpy(&tcon->fsDevInfo, response_data,
-			       sizeof (FILE_SYSTEM_DEVICE_INFO));
+			       sizeof(FILE_SYSTEM_DEVICE_INFO));
 		}
 	}
 	cifs_buf_release(pSMB);
@@ -4402,7 +4408,7 @@
 			     *) (((char *) &pSMBr->hdr.Protocol) +
 				 data_offset);
 			memcpy(&tcon->fsUnixInfo, response_data,
-			       sizeof (FILE_SYSTEM_UNIX_INFO));
+			       sizeof(FILE_SYSTEM_UNIX_INFO));
 		}
 	}
 	cifs_buf_release(pSMB);
@@ -4612,7 +4618,7 @@
 		strncpy(pSMB->FileName, fileName, name_len);
 	}
 	params = 6 + name_len;
-	data_count = sizeof (struct file_end_of_file_info);
+	data_count = sizeof(struct file_end_of_file_info);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
 	pSMB->MaxDataCount = cpu_to_le16(4100);
 	pSMB->MaxSetupCount = 0;
@@ -4800,7 +4806,7 @@
 
 	data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
 
-	count = sizeof (FILE_BASIC_INFO);
+	count = sizeof(FILE_BASIC_INFO);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
 	pSMB->MaxDataCount = cpu_to_le16(1000);	/* BB find max SMB PDU from sess */
 	pSMB->SetupCount = 1;
@@ -4871,7 +4877,7 @@
 	}
 
 	params = 6 + name_len;
-	count = sizeof (FILE_BASIC_INFO);
+	count = sizeof(FILE_BASIC_INFO);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
 	pSMB->MaxDataCount = cpu_to_le16(1000);	/* BB find exact max SMB PDU from sess structure BB */
 	pSMB->MaxSetupCount = 0;
@@ -4900,7 +4906,7 @@
 		pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
 	pSMB->Reserved4 = 0;
 	pSMB->hdr.smb_buf_length += byte_count;
-	memcpy(data_offset, data, sizeof (FILE_BASIC_INFO));
+	memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5003,7 +5009,7 @@
 	}
 
 	params = 6 + name_len;
-	count = sizeof (FILE_UNIX_BASIC_INFO);
+	count = sizeof(FILE_UNIX_BASIC_INFO);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
 	pSMB->MaxDataCount = cpu_to_le16(1000);	/* BB find exact max SMB PDU from sess structure BB */
 	pSMB->MaxSetupCount = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4af3588..19ee11f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -124,7 +124,7 @@
 	struct mid_q_entry *mid_entry;
 
 	spin_lock(&GlobalMid_Lock);
-	if ( kthread_should_stop() ) {
+	if (kthread_should_stop()) {
 		/* the demux thread will exit normally
 		next time through the loop */
 		spin_unlock(&GlobalMid_Lock);
@@ -151,9 +151,8 @@
 	}
 	list_for_each(tmp, &GlobalTreeConnectionList) {
 		tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
-		if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) {
+		if ((tcon) && (tcon->ses) && (tcon->ses->server == server))
 			tcon->tidStatus = CifsNeedReconnect;
-		}
 	}
 	read_unlock(&GlobalSMBSeslock);
 	/* do not want to be sending data on a socket we are freeing */
@@ -187,7 +186,7 @@
 	spin_unlock(&GlobalMid_Lock);
 	up(&server->tcpSem);
 
-	while ( (!kthread_should_stop()) && (server->tcpStatus != CifsGood)) {
+	while ((!kthread_should_stop()) && (server->tcpStatus != CifsGood)) {
 		try_to_freeze();
 		if (server->protocolType == IPV6) {
 			rc = ipv6_connect(&server->addr.sockAddr6,
@@ -204,7 +203,7 @@
 		} else {
 			atomic_inc(&tcpSesReconnectCount);
 			spin_lock(&GlobalMid_Lock);
-			if ( !kthread_should_stop() )
+			if (!kthread_should_stop())
 				server->tcpStatus = CifsGood;
 			server->sequence_number = 0;
 			spin_unlock(&GlobalMid_Lock);
@@ -352,17 +351,15 @@
 
 	current->flags |= PF_MEMALLOC;
 	server->tsk = current;	/* save process info to wake at shutdown */
-	cFYI(1, ("Demultiplex PID: %d", current->pid));
+	cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
 	write_lock(&GlobalSMBSeslock);
 	atomic_inc(&tcpSesAllocCount);
 	length = tcpSesAllocCount.counter;
 	write_unlock(&GlobalSMBSeslock);
 	complete(&cifsd_complete);
-	if (length  > 1) {
-		mempool_resize(cifs_req_poolp,
-			length + cifs_min_rcv,
-			GFP_KERNEL);
-	}
+	if (length  > 1)
+		mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
+				GFP_KERNEL);
 
 	set_freezable();
 	while (!kthread_should_stop()) {
@@ -378,7 +375,7 @@
 			}
 		} else if (isLargeBuf) {
 			/* we are reusing a dirty large buf, clear its start */
-			memset(bigbuf, 0, sizeof (struct smb_hdr));
+			memset(bigbuf, 0, sizeof(struct smb_hdr));
 		}
 
 		if (smallbuf == NULL) {
@@ -391,7 +388,7 @@
 			}
 			/* beginning of smb buffer is cleared in our buf_get */
 		} else /* if existing small buf clear beginning */
-			memset(smallbuf, 0, sizeof (struct smb_hdr));
+			memset(smallbuf, 0, sizeof(struct smb_hdr));
 
 		isLargeBuf = FALSE;
 		isMultiRsp = FALSE;
@@ -400,11 +397,13 @@
 		iov.iov_len = 4;
 		smb_msg.msg_control = NULL;
 		smb_msg.msg_controllen = 0;
+		pdu_length = 4; /* enough to get RFC1001 header */
+incomplete_rcv:
 		length =
 		    kernel_recvmsg(csocket, &smb_msg,
-				 &iov, 1, 4, 0 /* BB see socket.h flags */);
+				&iov, 1, pdu_length, 0 /* BB other flags? */);
 
-		if ( kthread_should_stop() ) {
+		if (kthread_should_stop()) {
 			break;
 		} else if (server->tcpStatus == CifsNeedReconnect) {
 			cFYI(1, ("Reconnect after server stopped responding"));
@@ -416,7 +415,10 @@
 			msleep(1); /* minimum sleep to prevent looping
 				allowing socket to clear and app threads to set
 				tcpStatus CifsNeedReconnect if server hung */
-			continue;
+			if (pdu_length < 4)
+				goto incomplete_rcv;
+			else
+				continue;
 		} else if (length <= 0) {
 			if (server->tcpStatus == CifsNew) {
 				cFYI(1, ("tcp session abend after SMBnegprot"));
@@ -437,13 +439,11 @@
 			wake_up(&server->response_q);
 			continue;
 		} else if (length < 4) {
-			cFYI(1,
-			    ("Frame under four bytes received (%d bytes long)",
+			cFYI(1, ("less than four bytes received (%d bytes)",
 			      length));
-			cifs_reconnect(server);
-			csocket = server->ssocket;
-			wake_up(&server->response_q);
-			continue;
+			pdu_length -= length;
+			msleep(1);
+			goto incomplete_rcv;
 		}
 
 		/* The right amount was read from socket - 4 bytes */
@@ -504,7 +504,7 @@
 
 		/* else we have an SMB response */
 		if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) ||
-			    (pdu_length < sizeof (struct smb_hdr) - 1 - 4)) {
+			    (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) {
 			cERROR(1, ("Invalid size SMB length %d pdu_length %d",
 					length, pdu_length+4));
 			cifs_reconnect(server);
@@ -528,7 +528,7 @@
 		     total_read += length) {
 			length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
 						pdu_length - total_read, 0);
-			if ( kthread_should_stop() ||
+			if (kthread_should_stop() ||
 			    (length == -EINTR)) {
 				/* then will exit */
 				reconnect = 2;
@@ -546,6 +546,7 @@
 					      allowing socket to clear and app
 					      threads to set tcpStatus
 					      CifsNeedReconnect if server hung*/
+				length = 0;
 				continue;
 			} else if (length <= 0) {
 				cERROR(1, ("Received no data, expecting %d",
@@ -631,9 +632,9 @@
 			/* Was previous buf put in mpx struct for multi-rsp? */
 			if (!isMultiRsp) {
 				/* smb buffer will be freed by user thread */
-				if (isLargeBuf) {
+				if (isLargeBuf)
 					bigbuf = NULL;
-				} else
+				else
 					smallbuf = NULL;
 			}
 			wake_up_process(task_to_wake);
@@ -676,9 +677,8 @@
 		server->ssocket = NULL;
 	}
 	/* buffer usuallly freed in free_mid - need to free it here on exit */
-	if (bigbuf != NULL)
-		cifs_buf_release(bigbuf);
-	if (smallbuf != NULL)
+	cifs_buf_release(bigbuf);
+	if (smallbuf) /* no sense logging a debug message if NULL */
 		cifs_small_buf_release(smallbuf);
 
 	read_lock(&GlobalSMBSeslock);
@@ -702,9 +702,8 @@
 		list_for_each(tmp, &GlobalSMBSessionList) {
 			ses = list_entry(tmp, struct cifsSesInfo,
 					 cifsSessionList);
-			if (ses->server == server) {
+			if (ses->server == server)
 				ses->status = CifsExiting;
-			}
 		}
 
 		spin_lock(&GlobalMid_Lock);
@@ -714,9 +713,8 @@
 				cFYI(1, ("Clearing Mid 0x%x - waking up ",
 					 mid_entry->mid));
 				task_to_wake = mid_entry->tsk;
-				if (task_to_wake) {
+				if (task_to_wake)
 					wake_up_process(task_to_wake);
-				}
 			}
 		}
 		spin_unlock(&GlobalMid_Lock);
@@ -749,18 +747,15 @@
 	list_for_each(tmp, &GlobalSMBSessionList) {
 		ses = list_entry(tmp, struct cifsSesInfo,
 				cifsSessionList);
-		if (ses->server == server) {
+		if (ses->server == server)
 			ses->server = NULL;
-		}
 	}
 	write_unlock(&GlobalSMBSeslock);
 
 	kfree(server);
-	if (length  > 0) {
-		mempool_resize(cifs_req_poolp,
-			length + cifs_min_rcv,
-			GFP_KERNEL);
-	}
+	if (length  > 0)
+		mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
+				GFP_KERNEL);
 
 	return 0;
 }
@@ -1477,7 +1472,7 @@
 	if (psin_server->sin_port) { /* user overrode default port */
 		rc = (*csocket)->ops->connect(*csocket,
 				(struct sockaddr *) psin_server,
-				sizeof (struct sockaddr_in), 0);
+				sizeof(struct sockaddr_in), 0);
 		if (rc >= 0)
 			connected = 1;
 	}
@@ -1493,7 +1488,7 @@
 
 			rc = (*csocket)->ops->connect(*csocket,
 					(struct sockaddr *) psin_server,
-					sizeof (struct sockaddr_in), 0);
+					sizeof(struct sockaddr_in), 0);
 			if (rc >= 0)
 				connected = 1;
 		}
@@ -1502,7 +1497,7 @@
 		psin_server->sin_port = htons(RFC1001_PORT);
 		rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *)
 					      psin_server,
-					      sizeof (struct sockaddr_in), 0);
+					      sizeof(struct sockaddr_in), 0);
 		if (rc >= 0)
 			connected = 1;
 	}
@@ -1610,7 +1605,7 @@
 	if (psin_server->sin6_port) { /* user overrode default port */
 		rc = (*csocket)->ops->connect(*csocket,
 				(struct sockaddr *) psin_server,
-				sizeof (struct sockaddr_in6), 0);
+				sizeof(struct sockaddr_in6), 0);
 		if (rc >= 0)
 			connected = 1;
 	}
@@ -1626,7 +1621,7 @@
 
 			rc = (*csocket)->ops->connect(*csocket,
 					(struct sockaddr *) psin_server,
-					sizeof (struct sockaddr_in6), 0);
+					sizeof(struct sockaddr_in6), 0);
 			if (rc >= 0)
 				connected = 1;
 		}
@@ -1634,7 +1629,7 @@
 	if (!connected) {
 		psin_server->sin6_port = htons(RFC1001_PORT);
 		rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *)
-				 psin_server, sizeof (struct sockaddr_in6), 0);
+				 psin_server, sizeof(struct sockaddr_in6), 0);
 		if (rc >= 0)
 			connected = 1;
 	}
@@ -1750,7 +1745,16 @@
 			cFYI(1, ("very large write cap"));
 #endif /* CIFS_DEBUG2 */
 		if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
-			cFYI(1, ("setting capabilities failed"));
+			if (vol_info == NULL) {
+				cFYI(1, ("resetting capabilities failed"));
+			} else
+				cERROR(1, ("Negotiating Unix capabilities "
+					   "with the server failed.  Consider "
+					   "mounting with the Unix Extensions\n"
+					   "disabled, if problems are found, "
+					   "by specifying the nounix mount "
+					   "option."));
+
 		}
 	}
 }
@@ -1909,8 +1913,8 @@
 			return rc;
 		}
 
-		srvTcp = kmalloc(sizeof (struct TCP_Server_Info), GFP_KERNEL);
-		if (srvTcp == NULL) {
+		srvTcp = kzalloc(sizeof(struct TCP_Server_Info), GFP_KERNEL);
+		if (!srvTcp) {
 			rc = -ENOMEM;
 			sock_release(csocket);
 			kfree(volume_info.UNC);
@@ -1919,9 +1923,8 @@
 			FreeXid(xid);
 			return rc;
 		} else {
-			memset(srvTcp, 0, sizeof (struct TCP_Server_Info));
 			memcpy(&srvTcp->addr.sockAddr, &sin_server,
-				sizeof (struct sockaddr_in));
+				sizeof(struct sockaddr_in));
 			atomic_set(&srvTcp->inFlight, 0);
 			/* BB Add code for ipv6 case too */
 			srvTcp->ssocket = csocket;
@@ -2173,8 +2176,18 @@
 						if (tsk)
 							kthread_stop(tsk);
 					}
-				} else
+				} else {
 					cFYI(1, ("No session or bad tcon"));
+					if ((pSesInfo->server) &&
+					    (pSesInfo->server->tsk)) {
+						struct task_struct *tsk;
+						force_sig(SIGKILL,
+							pSesInfo->server->tsk);
+						tsk = pSesInfo->server->tsk;
+						if (tsk)
+							kthread_stop(tsk);
+					}
+				}
 				sesInfoFree(pSesInfo);
 				/* pSesInfo = NULL; */
 			}
@@ -2185,8 +2198,10 @@
 		tcon->ses = pSesInfo;
 
 		/* do not care if following two calls succeed - informational */
-		CIFSSMBQFSDeviceInfo(xid, tcon);
-		CIFSSMBQFSAttributeInfo(xid, tcon);
+		if (!tcon->ipc) {
+			CIFSSMBQFSDeviceInfo(xid, tcon);
+			CIFSSMBQFSAttributeInfo(xid, tcon);
+		}
 
 		/* tell server which Unix caps we support */
 		if (tcon->ses->capabilities & CAP_UNIX)
@@ -2526,8 +2541,7 @@
 sesssetup_nomem:	/* do not return an error on nomem for the info strings,
 			   since that could make reconnection harder, and
 			   reconnection might be needed to free memory */
-	if (smb_buffer)
-		cifs_buf_release(smb_buffer);
+	cifs_buf_release(smb_buffer);
 
 	return rc;
 }
@@ -2547,7 +2561,7 @@
 	int remaining_words = 0;
 	int bytes_returned = 0;
 	int len;
-	int SecurityBlobLength = sizeof (NEGOTIATE_MESSAGE);
+	int SecurityBlobLength = sizeof(NEGOTIATE_MESSAGE);
 	PNEGOTIATE_MESSAGE SecurityBlob;
 	PCHALLENGE_MESSAGE SecurityBlob2;
 	__u32 negotiate_flags, capabilities;
@@ -2865,15 +2879,14 @@
 		rc = -EIO;
 	}
 
-	if (smb_buffer)
-		cifs_buf_release(smb_buffer);
+	cifs_buf_release(smb_buffer);
 
 	return rc;
 }
 static int
 CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
-		char *ntlm_session_key, int ntlmv2_flag,
-		const struct nls_table *nls_codepage)
+			char *ntlm_session_key, int ntlmv2_flag,
+			const struct nls_table *nls_codepage)
 {
 	struct smb_hdr *smb_buffer;
 	struct smb_hdr *smb_buffer_response;
@@ -2886,7 +2899,7 @@
 	int remaining_words = 0;
 	int bytes_returned = 0;
 	int len;
-	int SecurityBlobLength = sizeof (AUTHENTICATE_MESSAGE);
+	int SecurityBlobLength = sizeof(AUTHENTICATE_MESSAGE);
 	PAUTHENTICATE_MESSAGE SecurityBlob;
 	__u32 negotiate_flags, capabilities;
 	__u16 count;
@@ -2901,8 +2914,8 @@
 		return -ENOMEM;
 	}
 	smb_buffer_response = smb_buffer;
-	pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
-	pSMBr = (SESSION_SETUP_ANDX *) smb_buffer_response;
+	pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
+	pSMBr = (SESSION_SETUP_ANDX *)smb_buffer_response;
 
 	/* send SMBsessionSetup here */
 	header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
@@ -2921,7 +2934,7 @@
 		smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
 
 	capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
-	    CAP_EXTENDED_SECURITY;
+			CAP_EXTENDED_SECURITY;
 	if (ses->capabilities & CAP_UNICODE) {
 		smb_buffer->Flags2 |= SMBFLG2_UNICODE;
 		capabilities |= CAP_UNICODE;
@@ -2936,15 +2949,14 @@
 	}
 	pSMB->req.Capabilities = cpu_to_le32(capabilities);
 
-	bcc_ptr = (char *) &pSMB->req.SecurityBlob;
-	SecurityBlob = (PAUTHENTICATE_MESSAGE) bcc_ptr;
+	bcc_ptr = (char *)&pSMB->req.SecurityBlob;
+	SecurityBlob = (PAUTHENTICATE_MESSAGE)bcc_ptr;
 	strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8);
 	SecurityBlob->MessageType = NtLmAuthenticate;
 	bcc_ptr += SecurityBlobLength;
-	negotiate_flags =
-	    NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET |
-	    NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO |
-	    0x80000000 | NTLMSSP_NEGOTIATE_128;
+	negotiate_flags = NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET |
+			NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO |
+			0x80000000 | NTLMSSP_NEGOTIATE_128;
 	if (sign_CIFS_PDUs)
 		negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN;
 	if (ntlmv2_flag)
@@ -2979,36 +2991,32 @@
 			SecurityBlob->DomainName.Length = 0;
 			SecurityBlob->DomainName.MaximumLength = 0;
 		} else {
-			__u16 len =
-			    cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
+			__u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
 					  nls_codepage);
-			len *= 2;
+			ln *= 2;
 			SecurityBlob->DomainName.MaximumLength =
-			    cpu_to_le16(len);
+			    cpu_to_le16(ln);
 			SecurityBlob->DomainName.Buffer =
 			    cpu_to_le32(SecurityBlobLength);
-			bcc_ptr += len;
-			SecurityBlobLength += len;
-			SecurityBlob->DomainName.Length =
-			    cpu_to_le16(len);
+			bcc_ptr += ln;
+			SecurityBlobLength += ln;
+			SecurityBlob->DomainName.Length = cpu_to_le16(ln);
 		}
 		if (user == NULL) {
 			SecurityBlob->UserName.Buffer = 0;
 			SecurityBlob->UserName.Length = 0;
 			SecurityBlob->UserName.MaximumLength = 0;
 		} else {
-			__u16 len =
-			    cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
+			__u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
 					  nls_codepage);
-			len *= 2;
+			ln *= 2;
 			SecurityBlob->UserName.MaximumLength =
-			    cpu_to_le16(len);
+			    cpu_to_le16(ln);
 			SecurityBlob->UserName.Buffer =
 			    cpu_to_le32(SecurityBlobLength);
-			bcc_ptr += len;
-			SecurityBlobLength += len;
-			SecurityBlob->UserName.Length =
-			    cpu_to_le16(len);
+			bcc_ptr += ln;
+			SecurityBlobLength += ln;
+			SecurityBlob->UserName.Length = cpu_to_le16(ln);
 		}
 
 		/* SecurityBlob->WorkstationName.Length =
@@ -3052,33 +3060,32 @@
 			SecurityBlob->DomainName.Length = 0;
 			SecurityBlob->DomainName.MaximumLength = 0;
 		} else {
-			__u16 len;
+			__u16 ln;
 			negotiate_flags |= NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED;
 			strncpy(bcc_ptr, domain, 63);
-			len = strnlen(domain, 64);
+			ln = strnlen(domain, 64);
 			SecurityBlob->DomainName.MaximumLength =
-			    cpu_to_le16(len);
+			    cpu_to_le16(ln);
 			SecurityBlob->DomainName.Buffer =
 			    cpu_to_le32(SecurityBlobLength);
-			bcc_ptr += len;
-			SecurityBlobLength += len;
-			SecurityBlob->DomainName.Length = cpu_to_le16(len);
+			bcc_ptr += ln;
+			SecurityBlobLength += ln;
+			SecurityBlob->DomainName.Length = cpu_to_le16(ln);
 		}
 		if (user == NULL) {
 			SecurityBlob->UserName.Buffer = 0;
 			SecurityBlob->UserName.Length = 0;
 			SecurityBlob->UserName.MaximumLength = 0;
 		} else {
-			__u16 len;
+			__u16 ln;
 			strncpy(bcc_ptr, user, 63);
-			len = strnlen(user, 64);
-			SecurityBlob->UserName.MaximumLength =
-			    cpu_to_le16(len);
+			ln = strnlen(user, 64);
+			SecurityBlob->UserName.MaximumLength = cpu_to_le16(ln);
 			SecurityBlob->UserName.Buffer =
-			    cpu_to_le32(SecurityBlobLength);
-			bcc_ptr += len;
-			SecurityBlobLength += len;
-			SecurityBlob->UserName.Length = cpu_to_le16(len);
+						cpu_to_le32(SecurityBlobLength);
+			bcc_ptr += ln;
+			SecurityBlobLength += ln;
+			SecurityBlob->UserName.Length = cpu_to_le16(ln);
 		}
 		/* BB fill in our workstation name if known BB */
 
@@ -3100,12 +3107,11 @@
 	rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
 			 &bytes_returned, 1);
 	if (rc) {
-/*    rc = map_smb_to_linux_error(smb_buffer_response);  *//* done in SendReceive now */
-	} else if ((smb_buffer_response->WordCount == 3)
-		   || (smb_buffer_response->WordCount == 4)) {
+/*   rc = map_smb_to_linux_error(smb_buffer_response) done in SendReceive now */
+	} else if ((smb_buffer_response->WordCount == 3) ||
+		   (smb_buffer_response->WordCount == 4)) {
 		__u16 action = le16_to_cpu(pSMBr->resp.Action);
-		__u16 blob_len =
-		    le16_to_cpu(pSMBr->resp.SecurityBlobLength);
+		__u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
 		if (action & GUEST_LOGIN)
 			cFYI(1, (" Guest login")); /* BB Should we set anything
 							 in SesInfo struct ? */
@@ -3145,8 +3151,8 @@
 					} else {
 						remaining_words = BCC(smb_buffer_response) / 2;
 					}
-					len =
-					    UniStrnlen((wchar_t *) bcc_ptr,remaining_words - 1);
+					len = UniStrnlen((wchar_t *) bcc_ptr,
+							remaining_words - 1);
 /* We look for obvious messed up bcc or strings in response so we do not go off
   the end since (at least) WIN2K and Windows XP have a major bug in not null
   terminating last Unicode string in response  */
@@ -3230,7 +3236,7 @@
 						<= BCC(smb_buffer_response)) {
 						if (ses->serverOS)
 							kfree(ses->serverOS);
-						ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
+						ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
 						strncpy(ses->serverOS,bcc_ptr, len);
 
 						bcc_ptr += len;
@@ -3259,28 +3265,24 @@
 						bcc_ptr[0] = 0;
 						bcc_ptr++;
 					} else
-						cFYI(1,
-						     ("field of length %d "
+						cFYI(1, ("field of length %d "
 						   "extends beyond end of smb ",
 						      len));
 				}
 			} else {
-				cERROR(1,
-				       (" Security Blob extends beyond end "
+				cERROR(1, ("Security Blob extends beyond end "
 					"of SMB"));
 			}
 		} else {
 			cERROR(1, ("No session structure passed in."));
 		}
 	} else {
-		cERROR(1,
-		       (" Invalid Word count %d: ",
+		cERROR(1, ("Invalid Word count %d: ",
 			smb_buffer_response->WordCount));
 		rc = -EIO;
 	}
 
-	if (smb_buffer)
-		cifs_buf_release(smb_buffer);
+	cifs_buf_release(smb_buffer);
 
 	return rc;
 }
@@ -3389,6 +3391,18 @@
 		bcc_ptr = pByteArea(smb_buffer_response);
 		length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2);
 		/* skip service field (NB: this field is always ASCII) */
+		if (length == 3) {
+			if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
+			    (bcc_ptr[2] == 'C')) {
+				cFYI(1, ("IPC connection"));
+				tcon->ipc = 1;
+			}
+		} else if (length == 2) {
+			if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) {
+				/* the most common case */
+				cFYI(1, ("disk share connection"));
+			}
+		}
 		bcc_ptr += length + 1;
 		strncpy(tcon->treeName, tree, MAX_TREE_SIZE);
 		if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
@@ -3399,9 +3413,11 @@
 				kfree(tcon->nativeFileSystem);
 				tcon->nativeFileSystem =
 				    kzalloc(length + 2, GFP_KERNEL);
-				cifs_strfromUCS_le(tcon->nativeFileSystem,
-						   (__le16 *) bcc_ptr,
-						   length, nls_codepage);
+				if (tcon->nativeFileSystem)
+					cifs_strfromUCS_le(
+						tcon->nativeFileSystem,
+						(__le16 *) bcc_ptr,
+						length, nls_codepage);
 				bcc_ptr += 2 * length;
 				bcc_ptr[0] = 0;	/* null terminate the string */
 				bcc_ptr[1] = 0;
@@ -3416,8 +3432,9 @@
 				kfree(tcon->nativeFileSystem);
 				tcon->nativeFileSystem =
 				    kzalloc(length + 1, GFP_KERNEL);
-				strncpy(tcon->nativeFileSystem, bcc_ptr,
-					length);
+				if (tcon->nativeFileSystem)
+					strncpy(tcon->nativeFileSystem, bcc_ptr,
+						length);
 			}
 			/* else do not bother copying these information fields*/
 		}
@@ -3433,8 +3450,7 @@
 		ses->ipc_tid = smb_buffer_response->Tid;
 	}
 
-	if (smb_buffer)
-		cifs_buf_release(smb_buffer);
+	cifs_buf_release(smb_buffer);
 	return rc;
 }
 
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 4830acc..793404b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -3,7 +3,7 @@
  *
  *   vfs operations that deal with dentries
  *
- *   Copyright (C) International Business Machines  Corp., 2002,2005
+ *   Copyright (C) International Business Machines  Corp., 2002,2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -269,7 +269,7 @@
 			CIFSSMBClose(xid, pTcon, fileHandle);
 		} else if (newinode) {
 			pCifsFile =
-			   kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL);
+			   kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 
 			if (pCifsFile == NULL)
 				goto cifs_create_out;
@@ -397,7 +397,7 @@
 				/* BB Do not bother to decode buf since no
 				   local inode yet to put timestamps in,
 				   but we can reuse it safely */
-				int bytes_written;
+				unsigned int bytes_written;
 				struct win_dev *pdev;
 				pdev = (struct win_dev *)buf;
 				if (S_ISCHR(mode)) {
@@ -450,8 +450,7 @@
 
 	xid = GetXid();
 
-	cFYI(1,
-	     (" parent inode = 0x%p name is: %s and dentry = 0x%p",
+	cFYI(1, (" parent inode = 0x%p name is: %s and dentry = 0x%p",
 	      parent_dir_inode, direntry->d_name.name, direntry));
 
 	/* check whether path exists */
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 893fd0a..d614b91 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -43,6 +43,7 @@
 #include <linux/exportfs.h>
 #include "cifsglob.h"
 #include "cifs_debug.h"
+#include "cifsfs.h"
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
 static struct dentry *cifs_get_parent(struct dentry *dentry)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 894b1f7..1e7e4c0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -467,7 +467,7 @@
 int cifs_close(struct inode *inode, struct file *file)
 {
 	int rc = 0;
-	int xid;
+	int xid, timeout;
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *pTcon;
 	struct cifsFileInfo *pSMBFile =
@@ -485,9 +485,9 @@
 			/* no sense reconnecting to close a file that is
 			   already closed */
 			if (pTcon->tidStatus != CifsNeedReconnect) {
-				int timeout = 2;
+				timeout = 2;
 				while ((atomic_read(&pSMBFile->wrtPending) != 0)
-					 && (timeout < 1000) ) {
+					&& (timeout <= 2048)) {
 					/* Give write a better chance to get to
 					server ahead of the close.  We do not
 					want to add a wait_q here as it would
@@ -522,12 +522,30 @@
 		list_del(&pSMBFile->flist);
 		list_del(&pSMBFile->tlist);
 		write_unlock(&GlobalSMBSeslock);
+		timeout = 10;
+		/* We waited above to give the SMBWrite a chance to issue
+		   on the wire (so we do not get SMBWrite returning EBADF
+		   if writepages is racing with close.  Note that writepages
+		   does not specify a file handle, so it is possible for a file
+		   to be opened twice, and the application close the "wrong"
+		   file handle - in these cases we delay long enough to allow
+		   the SMBWrite to get on the wire before the SMB Close.
+		   We allow total wait here over 45 seconds, more than
+		   oplock break time, and more than enough to allow any write
+		   to complete on the server, or to time out on the client */
+		while ((atomic_read(&pSMBFile->wrtPending) != 0)
+				&& (timeout <= 50000)) {
+			cERROR(1, ("writes pending, delay free of handle"));
+			msleep(timeout);
+			timeout *= 8;
+		}
 		kfree(pSMBFile->search_resume_name);
 		kfree(file->private_data);
 		file->private_data = NULL;
 	} else
 		rc = -EBADF;
 
+	read_lock(&GlobalSMBSeslock);
 	if (list_empty(&(CIFS_I(inode)->openFileList))) {
 		cFYI(1, ("closing last open instance for inode %p", inode));
 		/* if the file is not open we do not know if we can cache info
@@ -535,6 +553,7 @@
 		CIFS_I(inode)->clientCanCacheRead = FALSE;
 		CIFS_I(inode)->clientCanCacheAll  = FALSE;
 	}
+	read_unlock(&GlobalSMBSeslock);
 	if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
 		rc = CIFS_I(inode)->write_behind_rc;
 	FreeXid(xid);
@@ -767,7 +786,8 @@
 			mutex_lock(&fid->lock_mutex);
 			list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
 				if (pfLock->fl_start <= li->offset &&
-						length >= li->length) {
+						(pfLock->fl_start + length) >=
+						(li->offset + li->length)) {
 					stored_rc = CIFSSMBLock(xid, pTcon,
 							netfid,
 							li->length, li->offset,
@@ -1022,6 +1042,7 @@
 	}
 
 	read_lock(&GlobalSMBSeslock);
+refind_writable:
 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
 		if (open_file->closePend)
 			continue;
@@ -1029,24 +1050,49 @@
 		    ((open_file->pfile->f_flags & O_RDWR) ||
 		     (open_file->pfile->f_flags & O_WRONLY))) {
 			atomic_inc(&open_file->wrtPending);
+
+			if (!open_file->invalidHandle) {
+				/* found a good writable file */
+				read_unlock(&GlobalSMBSeslock);
+				return open_file;
+			}
+	
 			read_unlock(&GlobalSMBSeslock);
-			if ((open_file->invalidHandle) &&
-			   (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) {
-				rc = cifs_reopen_file(open_file->pfile, FALSE);
-				/* if it fails, try another handle - might be */
-				/* dangerous to hold up writepages with retry */
-				if (rc) {
-					cFYI(1,
-					      ("failed on reopen file in wp"));
+			/* Had to unlock since following call can block */
+			rc = cifs_reopen_file(open_file->pfile, FALSE);
+			if (!rc) { 
+				if (!open_file->closePend)
+					return open_file;
+				else { /* start over in case this was deleted */
+				       /* since the list could be modified */
 					read_lock(&GlobalSMBSeslock);
-					/* can not use this handle, no write
-					pending on this one after all */
-					atomic_dec
-					     (&open_file->wrtPending);
-					continue;
+					atomic_dec(&open_file->wrtPending);
+					goto refind_writable;
 				}
 			}
-			return open_file;
+
+			/* if it fails, try another handle if possible -
+			(we can not do this if closePending since
+			loop could be modified - in which case we
+			have to start at the beginning of the list
+			again. Note that it would be bad
+			to hold up writepages here (rather than
+			in caller) with continuous retries */
+			cFYI(1, ("wp failed on reopen file"));
+			read_lock(&GlobalSMBSeslock);
+			/* can not use this handle, no write
+			   pending on this one after all */
+			atomic_dec(&open_file->wrtPending);
+			
+			if (open_file->closePend) /* list could have changed */
+				goto refind_writable;
+			/* else we simply continue to the next entry. Thus
+			   we do not loop on reopen errors.  If we
+			   can not reopen the file, for example if we
+			   reconnected to a server with another client
+			   racing to delete or lock the file we would not
+			   make progress if we restarted before the beginning
+			   of the loop here. */
 		}
 	}
 	read_unlock(&GlobalSMBSeslock);
@@ -1709,7 +1755,7 @@
 	struct page *page;
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *pTcon;
-	int bytes_read = 0;
+	unsigned int bytes_read = 0;
 	unsigned int read_size, i;
 	char *smb_read_data = NULL;
 	struct smb_com_read_rsp *pSMBr;
@@ -1803,7 +1849,7 @@
 
 			i +=  bytes_read >> PAGE_CACHE_SHIFT;
 			cifs_stats_bytes_read(pTcon, bytes_read);
-			if ((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) {
+			if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
 				i++; /* account for partial page */
 
 				/* server copy of file can have smaller size
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 279f3c5..5e8b388 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -115,7 +115,7 @@
 		inode->i_mode = le64_to_cpu(findData.Permissions);
 		/* since we set the inode type below we need to mask off
 		   to avoid strange results if bits set above */
-			inode->i_mode &= ~S_IFMT;
+		inode->i_mode &= ~S_IFMT;
 		if (type == UNIX_FILE) {
 			inode->i_mode |= S_IFREG;
 		} else if (type == UNIX_SYMLINK) {
@@ -575,19 +575,33 @@
 	return rc;
 }
 
+static const struct inode_operations cifs_ipc_inode_ops = {
+	.lookup = cifs_lookup,
+};
+
 /* gets root inode */
 void cifs_read_inode(struct inode *inode)
 {
-	int xid;
+	int xid, rc;
 	struct cifs_sb_info *cifs_sb;
 
 	cifs_sb = CIFS_SB(inode->i_sb);
 	xid = GetXid();
 
 	if (cifs_sb->tcon->unix_ext)
-		cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid);
+		rc = cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid);
 	else
-		cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid);
+		rc = cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid);
+	if (rc && cifs_sb->tcon->ipc) {
+		cFYI(1, ("ipc connection - fake read inode"));
+		inode->i_mode |= S_IFDIR;
+		inode->i_nlink = 2;
+		inode->i_op = &cifs_ipc_inode_ops;
+		inode->i_fop = &simple_dir_operations;
+		inode->i_uid = cifs_sb->mnt_uid;
+		inode->i_gid = cifs_sb->mnt_gid;
+	}
+
 	/* can not call macro FreeXid here since in a void func */
 	_FreeXid(xid);
 }
@@ -919,18 +933,25 @@
 			goto mkdir_out;
 		}
 
+		mode &= ~current->fs->umask;
 		rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT,
 				mode, NULL /* netfid */, pInfo, &oplock,
 				full_path, cifs_sb->local_nls,
 				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
-		if (rc) {
+		if (rc == -EOPNOTSUPP) {
+			kfree(pInfo);
+			goto mkdir_retry_old;
+		} else if (rc) {
 			cFYI(1, ("posix mkdir returned 0x%x", rc));
 			d_drop(direntry);
 		} else {
 			int obj_type;
-			if (pInfo->Type == -1) /* no return info - go query */
+			if (pInfo->Type == cpu_to_le32(-1)) {
+				/* no return info, go query for it */
+				kfree(pInfo);
 				goto mkdir_get_info;
+			}
 /*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
 	to set uid/gid */
 			inc_nlink(inode);
@@ -940,8 +961,10 @@
 				direntry->d_op = &cifs_dentry_ops;
 
 			newinode = new_inode(inode->i_sb);
-			if (newinode == NULL)
+			if (newinode == NULL) {
+				kfree(pInfo);
 				goto mkdir_get_info;
+			}
 			/* Is an i_ino of zero legal? */
 			/* Are there sanity checks we can use to ensure that
 			   the server is really filling in that field? */
@@ -972,7 +995,7 @@
 		kfree(pInfo);
 		goto mkdir_out;
 	}
-
+mkdir_retry_old:
 	/* BB add setting the equivalent of mode via CreateX w/ACLs */
 	rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
 			  cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1377,8 +1400,17 @@
 	}
 	i_size_write(inode, offset);
 	spin_unlock(&inode->i_lock);
+	/*
+	 * unmap_mapping_range is called twice, first simply for efficiency
+	 * so that truncate_inode_pages does fewer single-page unmaps. However
+	 * after this first call, and before truncate_inode_pages finishes,
+	 * it is possible for private pages to be COWed, which remain after
+	 * truncate_inode_pages finishes, hence the second unmap_mapping_range
+	 * call must be made for correctness.
+	 */
 	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
 	truncate_inode_pages(mapping, offset);
+	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
 	goto out_truncate;
 
 do_expand:
@@ -1469,7 +1501,7 @@
 			atomic_dec(&open_file->wrtPending);
 			cFYI(1, ("SetFSize for attrs rc = %d", rc));
 			if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
-				int bytes_written;
+				unsigned int bytes_written;
 				rc = CIFSSMBWrite(xid, pTcon,
 						  nfid, 0, attrs->ia_size,
 						  &bytes_written, NULL, NULL,
@@ -1502,7 +1534,7 @@
 					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 				if (rc == 0) {
-					int bytes_written;
+					unsigned int bytes_written;
 					rc = CIFSSMBWrite(xid, pTcon,
 							netfid, 0,
 							attrs->ia_size,
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 6a85ef7..11f2657 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -237,7 +237,7 @@
 	char *tmp_path = NULL;
 	char *tmpbuffer;
 	unsigned char *referrals = NULL;
-	int num_referrals = 0;
+	unsigned int num_referrals = 0;
 	int len;
 	__u16 fid;
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0bcec08..51ec681 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -169,7 +169,6 @@
 void
 cifs_buf_release(void *buf_to_free)
 {
-
 	if (buf_to_free == NULL) {
 		/* cFYI(1, ("Null buffer passed to cifs_buf_release"));*/
 		return;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 2bfed3f..f06359c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -114,10 +114,16 @@
 	{ERRusempx, -EIO},
 	{ERRusestd, -EIO},
 	{ERR_NOTIFY_ENUM_DIR, -ENOBUFS},
-	{ERRaccountexpired, -EACCES},
+	{ERRnoSuchUser, -EACCES},
+/*	{ERRaccountexpired, -EACCES},
 	{ERRbadclient, -EACCES},
 	{ERRbadLogonTime, -EACCES},
-	{ERRpasswordExpired, -EACCES},
+	{ERRpasswordExpired, -EACCES},*/
+	{ERRaccountexpired, -EKEYEXPIRED},
+	{ERRbadclient, -EACCES},
+	{ERRbadLogonTime, -EACCES},
+	{ERRpasswordExpired, -EKEYEXPIRED},
+
 	{ERRnosupport, -EINVAL},
 	{0, 0}
 };
@@ -270,7 +276,7 @@
 	 from NT_STATUS_NO_SUCH_USER to NT_STATUS_LOGON_FAILURE
 	 during the session setup } */
 	{
-	ERRDOS, ERRnoaccess, NT_STATUS_NO_SUCH_USER}, {
+	ERRDOS, ERRnoaccess, NT_STATUS_NO_SUCH_USER}, { /* could map to 2238 */
 	ERRHRD, ERRgeneral, NT_STATUS_GROUP_EXISTS}, {
 	ERRHRD, ERRgeneral, NT_STATUS_NO_SUCH_GROUP}, {
 	ERRHRD, ERRgeneral, NT_STATUS_MEMBER_IN_GROUP}, {
@@ -285,10 +291,10 @@
 	ERRHRD, ERRgeneral, NT_STATUS_PASSWORD_RESTRICTION}, {
 	ERRDOS, ERRnoaccess, NT_STATUS_LOGON_FAILURE}, {
 	ERRHRD, ERRgeneral, NT_STATUS_ACCOUNT_RESTRICTION}, {
-	ERRSRV, 2241, NT_STATUS_INVALID_LOGON_HOURS}, {
-	ERRSRV, 2240, NT_STATUS_INVALID_WORKSTATION}, {
+	ERRSRV, ERRbadLogonTime, NT_STATUS_INVALID_LOGON_HOURS}, {
+	ERRSRV, ERRbadclient, NT_STATUS_INVALID_WORKSTATION}, {
 	ERRSRV, ERRpasswordExpired, NT_STATUS_PASSWORD_EXPIRED}, {
-	ERRSRV, 2239, NT_STATUS_ACCOUNT_DISABLED}, {
+	ERRSRV, ERRaccountexpired, NT_STATUS_ACCOUNT_DISABLED}, {
 	ERRHRD, ERRgeneral, NT_STATUS_NONE_MAPPED}, {
 	ERRHRD, ERRgeneral, NT_STATUS_TOO_MANY_LUIDS_REQUESTED}, {
 	ERRHRD, ERRgeneral, NT_STATUS_LUIDS_EXHAUSTED}, {
@@ -585,7 +591,7 @@
 	ERRDOS, ERRnoaccess, NT_STATUS_TRUST_FAILURE}, {
 	ERRHRD, ERRgeneral, NT_STATUS_MUTANT_LIMIT_EXCEEDED}, {
 	ERRDOS, ERRnetlogonNotStarted, NT_STATUS_NETLOGON_NOT_STARTED}, {
-	ERRSRV, 2239, NT_STATUS_ACCOUNT_EXPIRED}, {
+	ERRSRV, ERRaccountexpired, NT_STATUS_ACCOUNT_EXPIRED}, {
 	ERRHRD, ERRgeneral, NT_STATUS_POSSIBLE_DEADLOCK}, {
 	ERRHRD, ERRgeneral, NT_STATUS_NETWORK_CREDENTIAL_CONFLICT}, {
 	ERRHRD, ERRgeneral, NT_STATUS_REMOTE_SESSION_LIMIT}, {
@@ -754,7 +760,7 @@
 }
 
 int
-map_smb_to_linux_error(struct smb_hdr *smb)
+map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
 {
 	unsigned int i;
 	int rc = -EIO;	/* if transport error smb error may not be set */
@@ -771,7 +777,9 @@
 		/* translate the newer STATUS codes to old style SMB errors
 		 * and then to POSIX errors */
 		__u32 err = le32_to_cpu(smb->Status.CifsError);
-		if (cifsFYI & CIFS_RC)
+		if (logErr && (err != (NT_STATUS_MORE_PROCESSING_REQUIRED)))
+			cifs_print_status(err);
+		else if (cifsFYI & CIFS_RC)
 			cifs_print_status(err);
 		ntstatus_to_dos(err, &smberrclass, &smberrcode);
 	} else {
@@ -813,7 +821,7 @@
 	}
 	/* else ERRHRD class errors or junk  - return EIO */
 
-	cFYI(1, (" !!Mapping smb error code %d to POSIX err %d !!",
+	cFYI(1, ("Mapping smb error code %d to POSIX err %d",
 		 smberrcode, rc));
 
 	/* generic corrective action e.g. reconnect SMB session on
@@ -899,8 +907,11 @@
 		cERROR(1, ("illegal hours %d", st->Hours));
 	days = sd->Day;
 	month = sd->Month;
-	if ((days > 31) || (month > 12))
+	if ((days > 31) || (month > 12)) {
 		cERROR(1, ("illegal date, month %d day: %d", month, days));
+		if (month > 12)
+			month = 12;
+	}
 	month -= 1;
 	days += total_days_of_prev_months[month];
 	days += 3652; /* account for difference in days between 1980 and 1970 */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 916df94..3746580 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -121,7 +121,7 @@
 
 
 static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
-			  char *buf, int *pobject_type, int isNewInode)
+			  char *buf, unsigned int *pobject_type, int isNewInode)
 {
 	loff_t local_size;
 	struct timespec local_mtime;
@@ -294,7 +294,7 @@
 }
 
 static void unix_fill_in_inode(struct inode *tmp_inode,
-	FILE_UNIX_INFO *pfindData, int *pobject_type, int isNewInode)
+	FILE_UNIX_INFO *pfindData, unsigned int *pobject_type, int isNewInode)
 {
 	loff_t local_size;
 	struct timespec local_mtime;
@@ -826,7 +826,7 @@
 	int rc = 0;
 	struct qstr qstring;
 	struct cifsFileInfo *pCifsF;
-	unsigned obj_type;
+	unsigned int obj_type;
 	ino_t  inum;
 	struct cifs_sb_info *cifs_sb;
 	struct inode *tmp_inode;
@@ -1067,7 +1067,7 @@
 		for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
 			if (current_entry == NULL) {
 				/* evaluate whether this case is an error */
-				cERROR(1,("past end of SMB num to fill %d i %d",
+				cERROR(1, ("past SMB end,  num to fill %d i %d",
 					  num_to_fill, i));
 				break;
 			}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 892be9b..899dc60 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -67,14 +67,59 @@
 		pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
 		capabilities |= CAP_DFS;
 	}
-	if (ses->capabilities & CAP_UNIX) {
+	if (ses->capabilities & CAP_UNIX)
 		capabilities |= CAP_UNIX;
-	}
 
 	/* BB check whether to init vcnum BB */
 	return capabilities;
 }
 
+static void
+unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
+{
+	char *bcc_ptr = *pbcc_area;
+	int bytes_ret = 0;
+
+	/* Copy OS version */
+	bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
+				  nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
+				  32, nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2; /* trailing null */
+
+	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
+				  32, nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2; /* trailing null */
+
+	*pbcc_area = bcc_ptr;
+}
+
+static void unicode_domain_string(char **pbcc_area, struct cifsSesInfo *ses,
+				   const struct nls_table *nls_cp)
+{
+	char *bcc_ptr = *pbcc_area;
+	int bytes_ret = 0;
+
+	/* copy domain */
+	if (ses->domainName == NULL) {
+		/* Sending null domain better than using a bogus domain name (as
+		we did briefly in 2.6.18) since server will use its default */
+		*bcc_ptr = 0;
+		*(bcc_ptr+1) = 0;
+		bytes_ret = 0;
+	} else
+		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
+					  256, nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2;  /* account for null terminator */
+
+	*pbcc_area = bcc_ptr;
+}
+
+
 static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
 				   const struct nls_table *nls_cp)
 {
@@ -100,32 +145,9 @@
 	}
 	bcc_ptr += 2 * bytes_ret;
 	bcc_ptr += 2; /* account for null termination */
-	/* copy domain */
-	if (ses->domainName == NULL) {
-		/* Sending null domain better than using a bogus domain name (as
-		we did briefly in 2.6.18) since server will use its default */
-		*bcc_ptr = 0;
-		*(bcc_ptr+1) = 0;
-		bytes_ret = 0;
-	} else
-		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
-					  256, nls_cp);
-	bcc_ptr += 2 * bytes_ret;
-	bcc_ptr += 2;  /* account for null terminator */
 
-	/* Copy OS version */
-	bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
-				  nls_cp);
-	bcc_ptr += 2 * bytes_ret;
-	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
-				  32, nls_cp);
-	bcc_ptr += 2 * bytes_ret;
-	bcc_ptr += 2; /* trailing null */
-
-	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
-				  32, nls_cp);
-	bcc_ptr += 2 * bytes_ret;
-	bcc_ptr += 2; /* trailing null */
+	unicode_domain_string(&bcc_ptr, ses, nls_cp);
+	unicode_oslm_strings(&bcc_ptr, nls_cp);
 
 	*pbcc_area = bcc_ptr;
 }
@@ -203,14 +225,11 @@
 	if (len >= words_left)
 		return rc;
 
-	if (ses->serverOS)
-		kfree(ses->serverOS);
+	kfree(ses->serverOS);
 	/* UTF-8 string will not grow more than four times as big as UCS-16 */
 	ses->serverOS = kzalloc(4 * len, GFP_KERNEL);
-	if (ses->serverOS != NULL) {
-		cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len,
-				   nls_cp);
-	}
+	if (ses->serverOS != NULL)
+		cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp);
 	data += 2 * (len + 1);
 	words_left -= len + 1;
 
@@ -220,8 +239,7 @@
 	if (len >= words_left)
 		return rc;
 
-	if (ses->serverNOS)
-		kfree(ses->serverNOS);
+	kfree(ses->serverNOS);
 	ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */
 	if (ses->serverNOS != NULL) {
 		cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
@@ -240,8 +258,7 @@
 	if (len > words_left)
 		return rc;
 
-	if (ses->serverDomain)
-		kfree(ses->serverDomain);
+	kfree(ses->serverDomain);
 	ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
 	if (ses->serverDomain != NULL) {
 		cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
@@ -271,8 +288,7 @@
 	if (len >= bleft)
 		return rc;
 
-	if (ses->serverOS)
-		kfree(ses->serverOS);
+	kfree(ses->serverOS);
 
 	ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
 	if (ses->serverOS)
@@ -289,8 +305,7 @@
 	if (len >= bleft)
 		return rc;
 
-	if (ses->serverNOS)
-		kfree(ses->serverNOS);
+	kfree(ses->serverNOS);
 
 	ses->serverNOS = kzalloc(len + 1, GFP_KERNEL);
 	if (ses->serverNOS)
@@ -479,7 +494,8 @@
 		if (ses->capabilities & CAP_UNICODE) {
 			if (iov[0].iov_len % 2) {
 				*bcc_ptr = 0;
-			}	bcc_ptr++;
+				bcc_ptr++;
+			}
 			unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
 		} else
 			ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
@@ -497,7 +513,8 @@
 
 	iov[1].iov_base = str_area;
 	iov[1].iov_len = count;
-	rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type, 0);
+	rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type,
+			  0 /* not long op */, 1 /* log NT STATUS if any */ );
 	/* SMB request buf freed in SendReceive2 */
 
 	cFYI(1, ("ssetup rc from sendrecv2 is %d", rc));
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index 2ef0be2..7f50e85 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -173,9 +173,10 @@
 #define ERRusestd		251	/* temporarily unable to use either raw
 					   or mpx */
 #define ERR_NOTIFY_ENUM_DIR	1024
+#define ERRnoSuchUser		2238	/* user account does not exist */
 #define ERRaccountexpired	2239
-#define ERRbadclient		2240
-#define ERRbadLogonTime		2241
+#define ERRbadclient		2240	/* can not logon from this client */
+#define ERRbadLogonTime		2241	/* logon hours do not allow this */
 #define ERRpasswordExpired	2242
 #define ERRnetlogonNotStarted	2455
 #define ERRnosupport		0xFFFF
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 746bc94..7ed32b3 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -55,7 +55,7 @@
 	if (temp == NULL)
 		return temp;
 	else {
-		memset(temp, 0, sizeof (struct mid_q_entry));
+		memset(temp, 0, sizeof(struct mid_q_entry));
 		temp->mid = smb_buffer->Mid;	/* always LE */
 		temp->pid = current->pid;
 		temp->command = smb_buffer->Command;
@@ -158,7 +158,7 @@
 	iov.iov_len = len;
 
 	smb_msg.msg_name = sin;
-	smb_msg.msg_namelen = sizeof (struct sockaddr);
+	smb_msg.msg_namelen = sizeof(struct sockaddr);
 	smb_msg.msg_control = NULL;
 	smb_msg.msg_controllen = 0;
 	smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/
@@ -228,7 +228,7 @@
 		return -ENOTSOCK; /* BB eventually add reconnect code here */
 
 	smb_msg.msg_name = sin;
-	smb_msg.msg_namelen = sizeof (struct sockaddr);
+	smb_msg.msg_namelen = sizeof(struct sockaddr);
 	smb_msg.msg_control = NULL;
 	smb_msg.msg_controllen = 0;
 	smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/
@@ -363,9 +363,8 @@
 		} /* else ok - we are setting up session */
 	}
 	*ppmidQ = AllocMidQEntry(in_buf, ses);
-	if (*ppmidQ == NULL) {
+	if (*ppmidQ == NULL)
 		return -ENOMEM;
-	}
 	return 0;
 }
 
@@ -419,7 +418,7 @@
 int
 SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 	     struct kvec *iov, int n_vec, int *pRespBufType /* ret */,
-	     const int long_op)
+	     const int long_op, const int logError)
 {
 	int rc = 0;
 	unsigned int receive_len;
@@ -465,7 +464,6 @@
 		wake_up(&ses->server->request_q);
 		return rc;
 	}
-
 	rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
 
 	midQ->midState = MID_REQUEST_SUBMITTED;
@@ -568,13 +566,11 @@
 			}
 
 			/* BB special case reconnect tid and uid here? */
-			/* BB special case Errbadpassword and pwdexpired here */
-			rc = map_smb_to_linux_error(midQ->resp_buf);
+			rc = map_smb_to_linux_error(midQ->resp_buf, logError);
 
 			/* convert ByteCount if necessary */
-			if (receive_len >=
-			    sizeof (struct smb_hdr) -
-			    4 /* do not count RFC1001 header */  +
+			if (receive_len >= sizeof(struct smb_hdr) - 4
+			    /* do not count RFC1001 header */  +
 			    (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
 				BCC(midQ->resp_buf) =
 					le16_to_cpu(BCC_LE(midQ->resp_buf));
@@ -749,12 +745,11 @@
 			*pbytes_returned = out_buf->smb_buf_length;
 
 			/* BB special case reconnect tid and uid here? */
-			rc = map_smb_to_linux_error(out_buf);
+			rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
 
 			/* convert ByteCount if necessary */
-			if (receive_len >=
-			    sizeof (struct smb_hdr) -
-			    4 /* do not count RFC1001 header */  +
+			if (receive_len >= sizeof(struct smb_hdr) - 4
+			    /* do not count RFC1001 header */  +
 			    (2 * out_buf->WordCount) + 2 /* bcc */ )
 				BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
 		} else {
@@ -993,12 +988,11 @@
 			*pbytes_returned = out_buf->smb_buf_length;
 
 			/* BB special case reconnect tid and uid here? */
-			rc = map_smb_to_linux_error(out_buf);
+			rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
 
 			/* convert ByteCount if necessary */
-			if (receive_len >=
-			    sizeof (struct smb_hdr) -
-			    4 /* do not count RFC1001 header */  +
+			if (receive_len >= sizeof(struct smb_hdr) - 4
+			    /* do not count RFC1001 header */  +
 			    (2 * out_buf->WordCount) + 2 /* bcc */ )
 				BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
 		} else {
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index f61e433..369e838 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -261,21 +261,26 @@
 				cifs_sb->local_nls,
 				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
-/*		else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+		else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 			__u16 fid;
 			int oplock = FALSE;
-			rc = CIFSSMBOpen(xid, pTcon, full_path,
-					 FILE_OPEN, GENERIC_READ, 0, &fid,
-					 &oplock, NULL, cifs_sb->local_nls,
-					 cifs_sb->mnt_cifs_flags &
-					 CIFS_MOUNT_MAP_SPECIAL_CHR);
+			if (experimEnabled) 
+				rc = CIFSSMBOpen(xid, pTcon, full_path,
+					FILE_OPEN, GENERIC_READ, 0, &fid,
+					&oplock, NULL, cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
+					CIFS_MOUNT_MAP_SPECIAL_CHR);
+			/* else rc is EOPNOTSUPP from above */
+
 			if(rc == 0) {
 				rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
 					ea_value, buf_size,
 					ACL_TYPE_ACCESS);
 				CIFSSMBClose(xid, pTcon, fid);
 			}
-		} */  /* BB enable after fixing up return data */
+		}
+#endif /* EXPERIMENTAL */
 #else
 		cFYI(1, ("query POSIX ACL not supported yet"));
 #endif /* CONFIG_CIFS_POSIX */
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index cdb4c07..359e531 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -51,7 +51,7 @@
 
         inp->ih.opcode = opcode;
 	inp->ih.pid = current->pid;
-	inp->ih.pgid = process_group(current);
+	inp->ih.pgid = task_pgrp_nr(current);
 #ifdef CONFIG_CODA_FS_OLD_API
 	memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
 	inp->ih.cred.cr_fsuid = current->fsuid;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 6438941..4f74154 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -456,7 +456,7 @@
 		printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
 		       "user (%d.%d.%d) kernel (%d.%d.%d)\n",
 		       current->comm,
-		       current->pid,
+		       task_pid_nr(current),
 		       req->version[0],
 		       req->version[1],
 		       req->version[2],
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index de61892..34f68f3 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -325,15 +325,14 @@
 	int wake_nests = 0;
 	unsigned long flags;
 	struct task_struct *this_task = current;
-	struct list_head *lsthead = &psw->wake_task_list, *lnk;
+	struct list_head *lsthead = &psw->wake_task_list;
 	struct wake_task_node *tncur;
 	struct wake_task_node tnode;
 
 	spin_lock_irqsave(&psw->lock, flags);
 
 	/* Try to see if the current task is already inside this wakeup call */
-	list_for_each(lnk, lsthead) {
-		tncur = list_entry(lnk, struct wake_task_node, llink);
+	list_for_each_entry(tncur, lsthead, llink) {
 
 		if (tncur->wq == wq ||
 		    (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) {
diff --git a/fs/exec.c b/fs/exec.c
index 070ddf13..2c942e2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -234,7 +234,7 @@
 	vma->vm_start = vma->vm_end - PAGE_SIZE;
 
 	vma->vm_flags = VM_STACK_FLAGS;
-	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	err = insert_vm_struct(mm, vma);
 	if (err) {
 		up_write(&mm->mmap_sem);
@@ -775,8 +775,8 @@
 	 * Reparenting needs write_lock on tasklist_lock,
 	 * so it is safe to do it under read_lock.
 	 */
-	if (unlikely(tsk->group_leader == child_reaper(tsk)))
-		tsk->nsproxy->pid_ns->child_reaper = tsk;
+	if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
+		task_active_pid_ns(tsk)->child_reaper = tsk;
 
 	zap_other_threads(tsk);
 	read_unlock(&tasklist_lock);
@@ -841,8 +841,8 @@
 		 */
 		tsk->start_time = leader->start_time;
 
-		BUG_ON(leader->tgid != tsk->tgid);
-		BUG_ON(tsk->pid == tsk->tgid);
+		BUG_ON(!same_thread_group(leader, tsk));
+		BUG_ON(has_group_leader_pid(tsk));
 		/*
 		 * An exec() starts a new thread group with the
 		 * TGID of the previous thread group. Rehash the
@@ -857,7 +857,7 @@
 		 */
 		detach_pid(tsk, PIDTYPE_PID);
 		tsk->pid = leader->pid;
-		attach_pid(tsk, PIDTYPE_PID,  find_pid(tsk->pid));
+		attach_pid(tsk, PIDTYPE_PID,  task_pid(leader));
 		transfer_pid(leader, tsk, PIDTYPE_PGID);
 		transfer_pid(leader, tsk, PIDTYPE_SID);
 		list_replace_rcu(&leader->tasks, &tsk->tasks);
@@ -1433,7 +1433,7 @@
 			case 'p':
 				pid_in_pattern = 1;
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current->tgid);
+					      "%d", task_tgid_vnr(current));
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1513,7 +1513,7 @@
 	if (!ispipe && !pid_in_pattern
             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
-			      ".%d", current->tgid);
+			      ".%d", task_tgid_vnr(current));
 		if (rc > out_end - out_ptr)
 			goto out;
 		out_ptr += rc;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3dec003..9b162cd 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2954,7 +2954,7 @@
 		return 0;
 
 	if (ext3_journal_current_handle()) {
-		jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+		jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
 		dump_stack();
 		return -EIO;
 	}
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index f58cbb2..4083738 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -741,12 +741,11 @@
 		}
 	} else {
 		/* Allocate a buffer where we construct the new block. */
-		s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+		s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
 		/* assert(header == s->base) */
 		error = -ENOMEM;
 		if (s->base == NULL)
 			goto cleanup;
-		memset(s->base, 0, sb->s_blocksize);
 		header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
 		header(s->base)->h_blocks = cpu_to_le32(1);
 		header(s->base)->h_refcount = cpu_to_le32(1);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c9db73f..8685263 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -18,6 +18,7 @@
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/rcupdate.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -292,7 +293,7 @@
 		who = -who;
 	}
 	rcu_read_lock();
-	pid = find_pid(who);
+	pid = find_vpid(who);
 	result = __f_setown(filp, pid, type, force);
 	rcu_read_unlock();
 	return result;
@@ -308,7 +309,7 @@
 {
 	pid_t pid;
 	read_lock(&filp->f_owner.lock);
-	pid = pid_nr(filp->f_owner.pid);
+	pid = pid_nr_ns(filp->f_owner.pid, current->nsproxy->pid_ns);
 	if (filp->f_owner.pid_type == PIDTYPE_PGID)
 		pid = -pid;
 	read_unlock(&filp->f_owner.lock);
diff --git a/fs/file_table.c b/fs/file_table.c
index 3176fef..664e3f2 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -323,12 +323,11 @@
 
 int fs_may_remount_ro(struct super_block *sb)
 {
-	struct list_head *p;
+	struct file *file;
 
 	/* Check that no files are currently opened for writing. */
 	file_list_lock();
-	list_for_each(p, &sb->s_files) {
-		struct file *file = list_entry(p, struct file, f_u.fu_list);
+	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
 		struct inode *inode = file->f_path.dentry->d_inode;
 
 		/* File with pending delete? */
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 686734f..0fca820 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -89,7 +89,7 @@
 		if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
 			printk(KERN_DEBUG
 			       "%s(%d): dirtied inode %lu (%s) on %s\n",
-			       current->comm, current->pid, inode->i_ino,
+			       current->comm, task_pid_nr(current), inode->i_ino,
 			       name, inode->i_sb->s_id);
 	}
 
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 10d2c21..d6ff77e 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -25,6 +25,7 @@
 #include <linux/capability.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
+#include <linux/pid_namespace.h>
 
 static int set_task_ioprio(struct task_struct *task, int ioprio)
 {
@@ -93,7 +94,7 @@
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_vpid(who);
 			if (p)
 				ret = set_task_ioprio(p, ioprio);
 			break;
@@ -101,7 +102,7 @@
 			if (!who)
 				pgrp = task_pgrp(current);
 			else
-				pgrp = find_pid(who);
+				pgrp = find_vpid(who);
 			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
@@ -180,7 +181,7 @@
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_vpid(who);
 			if (p)
 				ret = get_task_ioprio(p);
 			break;
@@ -188,7 +189,7 @@
 			if (!who)
 				pgrp = task_pgrp(current);
 			else
-				pgrp = find_pid(who);
+				pgrp = find_vpid(who);
 			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 				tmpio = get_task_ioprio(p);
 				if (tmpio < 0)
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a263d82..8f1f2aa 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -466,7 +466,7 @@
 	spin_unlock(&journal->j_list_lock);
 
 	if (err)
-		__journal_abort_hard(journal);
+		journal_abort(journal, err);
 
 	journal_write_revoke_records(journal, commit_transaction);
 
@@ -524,7 +524,7 @@
 
 			descriptor = journal_get_descriptor_buffer(journal);
 			if (!descriptor) {
-				__journal_abort_hard(journal);
+				journal_abort(journal, -EIO);
 				continue;
 			}
 
@@ -557,7 +557,7 @@
 		   and repeat this loop: we'll fall into the
 		   refile-on-abort condition above. */
 		if (err) {
-			__journal_abort_hard(journal);
+			journal_abort(journal, err);
 			continue;
 		}
 
@@ -748,7 +748,7 @@
 		err = -EIO;
 
 	if (err)
-		__journal_abort_hard(journal);
+		journal_abort(journal, err);
 
 	/* End of a transaction!  Finally, we can do checkpoint
            processing: any buffers committed as a result of this
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 5d9fec0..5d14243 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -35,6 +35,7 @@
 #include <linux/kthread.h>
 #include <linux/poison.h>
 #include <linux/proc_fs.h>
+#include <linux/debugfs.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -654,10 +655,9 @@
 	journal_t *journal;
 	int err;
 
-	journal = kmalloc(sizeof(*journal), GFP_KERNEL);
+	journal = kzalloc(sizeof(*journal), GFP_KERNEL);
 	if (!journal)
 		goto fail;
-	memset(journal, 0, sizeof(*journal));
 
 	init_waitqueue_head(&journal->j_wait_transaction_locked);
 	init_waitqueue_head(&journal->j_wait_logspace);
@@ -1852,63 +1852,40 @@
 }
 
 /*
- * /proc tunables
+ * debugfs tunables
  */
-#if defined(CONFIG_JBD_DEBUG)
-int journal_enable_debug;
+#ifdef CONFIG_JBD_DEBUG
+
+u8 journal_enable_debug __read_mostly;
 EXPORT_SYMBOL(journal_enable_debug);
-#endif
 
-#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
+static struct dentry *jbd_debugfs_dir;
+static struct dentry *jbd_debug;
 
-static struct proc_dir_entry *proc_jbd_debug;
-
-static int read_jbd_debug(char *page, char **start, off_t off,
-			  int count, int *eof, void *data)
+static void __init jbd_create_debugfs_entry(void)
 {
-	int ret;
-
-	ret = sprintf(page + off, "%d\n", journal_enable_debug);
-	*eof = 1;
-	return ret;
+	jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
+	if (jbd_debugfs_dir)
+		jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO,
+					       jbd_debugfs_dir,
+					       &journal_enable_debug);
 }
 
-static int write_jbd_debug(struct file *file, const char __user *buffer,
-			   unsigned long count, void *data)
+static void __exit jbd_remove_debugfs_entry(void)
 {
-	char buf[32];
-
-	if (count > ARRAY_SIZE(buf) - 1)
-		count = ARRAY_SIZE(buf) - 1;
-	if (copy_from_user(buf, buffer, count))
-		return -EFAULT;
-	buf[ARRAY_SIZE(buf) - 1] = '\0';
-	journal_enable_debug = simple_strtoul(buf, NULL, 10);
-	return count;
-}
-
-#define JBD_PROC_NAME "sys/fs/jbd-debug"
-
-static void __init create_jbd_proc_entry(void)
-{
-	proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
-	if (proc_jbd_debug) {
-		/* Why is this so hard? */
-		proc_jbd_debug->read_proc = read_jbd_debug;
-		proc_jbd_debug->write_proc = write_jbd_debug;
-	}
-}
-
-static void __exit remove_jbd_proc_entry(void)
-{
-	if (proc_jbd_debug)
-		remove_proc_entry(JBD_PROC_NAME, NULL);
+	debugfs_remove(jbd_debug);
+	debugfs_remove(jbd_debugfs_dir);
 }
 
 #else
 
-#define create_jbd_proc_entry() do {} while (0)
-#define remove_jbd_proc_entry() do {} while (0)
+static inline void jbd_create_debugfs_entry(void)
+{
+}
+
+static inline void jbd_remove_debugfs_entry(void)
+{
+}
 
 #endif
 
@@ -1966,7 +1943,7 @@
 	ret = journal_init_caches();
 	if (ret != 0)
 		journal_destroy_caches();
-	create_jbd_proc_entry();
+	jbd_create_debugfs_entry();
 	return ret;
 }
 
@@ -1977,7 +1954,7 @@
 	if (n)
 		printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
 #endif
-	remove_jbd_proc_entry();
+	jbd_remove_debugfs_entry();
 	journal_destroy_caches();
 }
 
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 2a5f4b8..c5d9694 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -250,10 +250,10 @@
 	if (!err)
 		err = do_one_pass(journal, &info, PASS_REPLAY);
 
-	jbd_debug(0, "JBD: recovery, exit status %d, "
+	jbd_debug(1, "JBD: recovery, exit status %d, "
 		  "recovered transactions %u to %u\n",
 		  err, info.start_transaction, info.end_transaction);
-	jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
+	jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
 		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
 
 	/* Restart the log at the next transaction ID, thus invalidating
@@ -297,7 +297,7 @@
 #ifdef CONFIG_JBD_DEBUG
 		int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
 #endif
-		jbd_debug(0,
+		jbd_debug(1,
 			  "JBD: ignoring %d transaction%s from the journal.\n",
 			  dropped, (dropped == 1) ? "" : "s");
 		journal->j_transaction_sequence = ++info.end_transaction;
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 9841b1e..08ff6c7 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -96,13 +96,12 @@
 
 alloc_transaction:
 	if (!journal->j_running_transaction) {
-		new_transaction = kmalloc(sizeof(*new_transaction),
+		new_transaction = kzalloc(sizeof(*new_transaction),
 						GFP_NOFS|__GFP_NOFAIL);
 		if (!new_transaction) {
 			ret = -ENOMEM;
 			goto out;
 		}
-		memset(new_transaction, 0, sizeof(*new_transaction));
 	}
 
 	jbd_debug(3, "New handle %p going live.\n", handle);
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 2a49f2c..4130ada 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -80,28 +80,28 @@
 #define JFFS2_ERROR(fmt, ...)						\
 	do {								\
 		printk(JFFS2_ERR_MSG_PREFIX				\
-			" (%d) %s: " fmt, current->pid,			\
+			" (%d) %s: " fmt, task_pid_nr(current),		\
 			__FUNCTION__ , ##__VA_ARGS__);			\
 	} while(0)
 
 #define JFFS2_WARNING(fmt, ...)						\
 	do {								\
 		printk(JFFS2_WARN_MSG_PREFIX				\
-			" (%d) %s: " fmt, current->pid,			\
+			" (%d) %s: " fmt, task_pid_nr(current),		\
 			__FUNCTION__ , ##__VA_ARGS__);			\
 	} while(0)
 
 #define JFFS2_NOTICE(fmt, ...)						\
 	do {								\
 		printk(JFFS2_NOTICE_MSG_PREFIX				\
-			" (%d) %s: " fmt, current->pid,			\
+			" (%d) %s: " fmt, task_pid_nr(current),		\
 			__FUNCTION__ , ##__VA_ARGS__);			\
 	} while(0)
 
 #define JFFS2_DEBUG(fmt, ...)						\
 	do {								\
 		printk(JFFS2_DBG_MSG_PREFIX				\
-			" (%d) %s: " fmt, current->pid,			\
+			" (%d) %s: " fmt, task_pid_nr(current),		\
 			__FUNCTION__ , ##__VA_ARGS__);			\
 	} while(0)
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 07daa79..8607529 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1411,7 +1411,7 @@
 		mnt_flags |= MNT_RELATIME;
 
 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
-		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME);
+		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
 
 	/* ... and get the mountpoint */
 	retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index af8b235..11833f4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -168,7 +168,8 @@
 	spin_unlock(&inode->i_lock);
 
 	spin_unlock(&clp->cl_lock);
-	kfree(delegation);
+	if (delegation != NULL)
+		nfs_free_delegation(delegation);
 	return status;
 }
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8ec7fbd..3533453 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -562,6 +562,7 @@
 	nfs_fattr_init(&fattr);
 	desc->entry = &my_entry;
 
+	nfs_block_sillyrename(dentry);
 	while(!desc->entry->eof) {
 		res = readdir_search_pagecache(desc);
 
@@ -592,6 +593,7 @@
 			break;
 		}
 	}
+	nfs_unblock_sillyrename(dentry);
 	unlock_kernel();
 	if (res > 0)
 		res = 0;
@@ -866,6 +868,7 @@
 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
 	struct dentry *res;
+	struct dentry *parent;
 	struct inode *inode = NULL;
 	int error;
 	struct nfs_fh fhandle;
@@ -894,26 +897,31 @@
 		goto out_unlock;
 	}
 
+	parent = dentry->d_parent;
+	/* Protect against concurrent sillydeletes */
+	nfs_block_sillyrename(parent);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error == -ENOENT)
 		goto no_entry;
 	if (error < 0) {
 		res = ERR_PTR(error);
-		goto out_unlock;
+		goto out_unblock_sillyrename;
 	}
 	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
 	res = (struct dentry *)inode;
 	if (IS_ERR(res))
-		goto out_unlock;
+		goto out_unblock_sillyrename;
 
 no_entry:
 	res = d_materialise_unique(dentry, inode);
 	if (res != NULL) {
 		if (IS_ERR(res))
-			goto out_unlock;
+			goto out_unblock_sillyrename;
 		dentry = res;
 	}
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+out_unblock_sillyrename:
+	nfs_unblock_sillyrename(parent);
 out_unlock:
 	unlock_kernel();
 out:
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d29f90d..b3bb89f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -131,7 +131,7 @@
 {
 	/* Ensure that dirty pages are flushed out with the right creds */
 	if (filp->f_mode & FMODE_WRITE)
-		filemap_fdatawrite(filp->f_mapping);
+		nfs_wb_all(filp->f_path.dentry->d_inode);
 	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6d2f2a3..db5d96d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -514,7 +514,7 @@
 	return ctx;
 }
 
-void put_nfs_open_context(struct nfs_open_context *ctx)
+static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait)
 {
 	struct inode *inode = ctx->path.dentry->d_inode;
 
@@ -522,8 +522,12 @@
 		return;
 	list_del(&ctx->list);
 	spin_unlock(&inode->i_lock);
-	if (ctx->state != NULL)
-		nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+	if (ctx->state != NULL) {
+		if (wait)
+			nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
+		else
+			nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+	}
 	if (ctx->cred != NULL)
 		put_rpccred(ctx->cred);
 	dput(ctx->path.dentry);
@@ -531,6 +535,16 @@
 	kfree(ctx);
 }
 
+void put_nfs_open_context(struct nfs_open_context *ctx)
+{
+	__put_nfs_open_context(ctx, 0);
+}
+
+static void put_nfs_open_context_sync(struct nfs_open_context *ctx)
+{
+	__put_nfs_open_context(ctx, 1);
+}
+
 /*
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
@@ -577,7 +591,7 @@
 		spin_lock(&inode->i_lock);
 		list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
 		spin_unlock(&inode->i_lock);
-		put_nfs_open_context(ctx);
+		put_nfs_open_context_sync(ctx);
 	}
 }
 
@@ -1169,6 +1183,9 @@
 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 	nfsi->ncommit = 0;
 	nfsi->npages = 0;
+	atomic_set(&nfsi->silly_count, 1);
+	INIT_HLIST_HEAD(&nfsi->silly_list);
+	init_waitqueue_head(&nfsi->waitqueue);
 	nfs4_init_once(nfsi);
 }
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index d2802b1..b35069a 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -178,7 +178,7 @@
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -209,6 +209,7 @@
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
+extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -235,6 +236,7 @@
 #else
 
 #define nfs4_close_state(a, b, c) do { } while (0)
+#define nfs4_close_sync(a, b, c) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
 #endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cb99fd9..f03d9d5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1305,7 +1305,7 @@
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-int nfs4_do_close(struct path *path, struct nfs4_state *state)
+int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_closedata *calldata;
@@ -1333,8 +1333,11 @@
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
+	status = 0;
+	if (wait)
+		status = rpc_wait_for_completion_task(task);
 	rpc_put_task(task);
-	return 0;
+	return status;
 out_free_calldata:
 	kfree(calldata);
 out:
@@ -1365,13 +1368,14 @@
 	}
 	ret = PTR_ERR(filp);
 out_close:
-	nfs4_close_state(path, state, nd->intent.open.flags);
+	nfs4_close_sync(path, state, nd->intent.open.flags);
 	return ret;
 }
 
 struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+	struct dentry *parent;
 	struct path path = {
 		.mnt = nd->mnt,
 		.dentry = dentry,
@@ -1394,6 +1398,9 @@
 	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
+	parent = dentry->d_parent;
+	/* Protect against concurrent sillydeletes */
+	nfs_block_sillyrename(parent);
 	state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
@@ -1401,12 +1408,14 @@
 			d_add(dentry, NULL);
 			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 		}
+		nfs_unblock_sillyrename(parent);
 		return (struct dentry *)state;
 	}
 	res = d_add_unique(dentry, igrab(state->inode));
 	if (res != NULL)
 		path.dentry = res;
 	nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
+	nfs_unblock_sillyrename(parent);
 	nfs4_intent_set_file(nd, &path, state);
 	return res;
 }
@@ -1444,7 +1453,7 @@
 		nfs4_intent_set_file(nd, &path, state);
 		return 1;
 	}
-	nfs4_close_state(&path, state, openflags);
+	nfs4_close_sync(&path, state, openflags);
 out_drop:
 	d_drop(dentry);
 	return 0;
@@ -1898,7 +1907,7 @@
 	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
 		status = nfs4_intent_set_file(nd, &path, state);
 	else
-		nfs4_close_state(&path, state, flags);
+		nfs4_close_sync(&path, state, flags);
 out:
 	return status;
 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index bfb3626..23a9a36 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -425,7 +425,7 @@
 /*
  * Close the current file.
  */
-void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
+static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mode, int wait)
 {
 	struct nfs4_state_owner *owner = state->owner;
 	int call_close = 0;
@@ -466,7 +466,17 @@
 		nfs4_put_open_state(state);
 		nfs4_put_state_owner(owner);
 	} else
-		nfs4_do_close(path, state);
+		nfs4_do_close(path, state, wait);
+}
+
+void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
+{
+	__nfs4_close(path, state, mode, 0);
+}
+
+void nfs4_close_sync(struct path *path, struct nfs4_state *state, mode_t mode)
+{
+	__nfs4_close(path, state, mode, 1);
 }
 
 /*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 1aed850..ce558c2 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -11,9 +11,11 @@
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
-
+#include <linux/sched.h>
+#include <linux/wait.h>
 
 struct nfs_unlinkdata {
+	struct hlist_node list;
 	struct nfs_removeargs args;
 	struct nfs_removeres res;
 	struct inode *dir;
@@ -52,6 +54,20 @@
 	return 0;
 }
 
+static void nfs_free_dname(struct nfs_unlinkdata *data)
+{
+	kfree(data->args.name.name);
+	data->args.name.name = NULL;
+	data->args.name.len = 0;
+}
+
+static void nfs_dec_sillycount(struct inode *dir)
+{
+	struct nfs_inode *nfsi = NFS_I(dir);
+	if (atomic_dec_return(&nfsi->silly_count) == 1)
+		wake_up(&nfsi->waitqueue);
+}
+
 /**
  * nfs_async_unlink_init - Initialize the RPC info
  * task: rpc_task of the sillydelete
@@ -95,6 +111,8 @@
 static void nfs_async_unlink_release(void *calldata)
 {
 	struct nfs_unlinkdata	*data = calldata;
+
+	nfs_dec_sillycount(data->dir);
 	nfs_free_unlinkdata(data);
 }
 
@@ -104,24 +122,35 @@
 	.rpc_release = nfs_async_unlink_release,
 };
 
-static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
+static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
 {
 	struct rpc_task *task;
-	struct dentry *parent;
-	struct inode *dir;
+	struct dentry *alias;
 
-	if (nfs_copy_dname(dentry, data) < 0)
-		goto out_free;
-
-	parent = dget_parent(dentry);
-	if (parent == NULL)
-		goto out_free;
-	dir = igrab(parent->d_inode);
-	dput(parent);
-	if (dir == NULL)
-		goto out_free;
-
-	data->dir = dir;
+	alias = d_lookup(parent, &data->args.name);
+	if (alias != NULL) {
+		int ret = 0;
+		/*
+		 * Hey, we raced with lookup... See if we need to transfer
+		 * the sillyrename information to the aliased dentry.
+		 */
+		nfs_free_dname(data);
+		spin_lock(&alias->d_lock);
+		if (!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+			alias->d_fsdata = data;
+			alias->d_flags ^= DCACHE_NFSFS_RENAMED;
+			ret = 1;
+		}
+		spin_unlock(&alias->d_lock);
+		nfs_dec_sillycount(dir);
+		dput(alias);
+		return ret;
+	}
+	data->dir = igrab(dir);
+	if (!data->dir) {
+		nfs_dec_sillycount(dir);
+		return 0;
+	}
 	data->args.fh = NFS_FH(dir);
 	nfs_fattr_init(&data->res.dir_attr);
 
@@ -129,8 +158,64 @@
 	if (!IS_ERR(task))
 		rpc_put_task(task);
 	return 1;
+}
+
+static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
+{
+	struct dentry *parent;
+	struct inode *dir;
+	int ret = 0;
+
+
+	parent = dget_parent(dentry);
+	if (parent == NULL)
+		goto out_free;
+	dir = parent->d_inode;
+	if (nfs_copy_dname(dentry, data) == 0)
+		goto out_dput;
+	/* Non-exclusive lock protects against concurrent lookup() calls */
+	spin_lock(&dir->i_lock);
+	if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
+		/* Deferred delete */
+		hlist_add_head(&data->list, &NFS_I(dir)->silly_list);
+		spin_unlock(&dir->i_lock);
+		ret = 1;
+		goto out_dput;
+	}
+	spin_unlock(&dir->i_lock);
+	ret = nfs_do_call_unlink(parent, dir, data);
+out_dput:
+	dput(parent);
 out_free:
-	return 0;
+	return ret;
+}
+
+void nfs_block_sillyrename(struct dentry *dentry)
+{
+	struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
+
+	wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1);
+}
+
+void nfs_unblock_sillyrename(struct dentry *dentry)
+{
+	struct inode *dir = dentry->d_inode;
+	struct nfs_inode *nfsi = NFS_I(dir);
+	struct nfs_unlinkdata *data;
+
+	atomic_inc(&nfsi->silly_count);
+	spin_lock(&dir->i_lock);
+	while (!hlist_empty(&nfsi->silly_list)) {
+		if (!atomic_inc_not_zero(&nfsi->silly_count))
+			break;
+		data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list);
+		hlist_del(&data->list);
+		spin_unlock(&dir->i_lock);
+		if (nfs_do_call_unlink(dentry, dir, data) == 0)
+			nfs_free_unlinkdata(data);
+		spin_lock(&dir->i_lock);
+	}
+	spin_unlock(&dir->i_lock);
 }
 
 /**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0cf9d1c..89527a4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -174,8 +174,6 @@
 		return;
 	if (count != nfs_page_length(page))
 		return;
-	if (count != PAGE_CACHE_SIZE)
-		zero_user_page(page, count, PAGE_CACHE_SIZE - count, KM_USER0);
 	SetPageUptodate(page);
 }
 
@@ -627,7 +625,8 @@
 				return ERR_PTR(error);
 			}
 			spin_unlock(&inode->i_lock);
-			return new;
+			req = new;
+			goto zero_page;
 		}
 		spin_unlock(&inode->i_lock);
 
@@ -655,13 +654,23 @@
 	if (offset < req->wb_offset) {
 		req->wb_offset = offset;
 		req->wb_pgbase = offset;
-		req->wb_bytes = rqend - req->wb_offset;
+		req->wb_bytes = max(end, rqend) - req->wb_offset;
+		goto zero_page;
 	}
 
 	if (end > rqend)
 		req->wb_bytes = end - req->wb_offset;
 
 	return req;
+zero_page:
+	/* If this page might potentially be marked as up to date,
+	 * then we need to zero any uninitalised data. */
+	if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE
+			&& !PageUptodate(req->wb_page))
+		zero_user_page(req->wb_page, req->wb_bytes,
+				PAGE_CACHE_SIZE - req->wb_bytes,
+				KM_USER0);
+	return req;
 }
 
 int nfs_flush_incompatible(struct file *file, struct page *page)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 46934c9..d019918 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1029,13 +1029,13 @@
 		if (EX_WGATHER(exp)) {
 			if (atomic_read(&inode->i_writecount) > 1
 			    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
-				dprintk("nfsd: write defer %d\n", current->pid);
+				dprintk("nfsd: write defer %d\n", task_pid_nr(current));
 				msleep(10);
-				dprintk("nfsd: write resume %d\n", current->pid);
+				dprintk("nfsd: write resume %d\n", task_pid_nr(current));
 			}
 
 			if (inode->i_state & I_DIRTY) {
-				dprintk("nfsd: write sync %d\n", current->pid);
+				dprintk("nfsd: write sync %d\n", task_pid_nr(current));
 				host_err=nfsd_sync(file);
 			}
 #if 0
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f14b541..9cc7c04 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1372,7 +1372,7 @@
 
 	spin_lock(&o2hb_live_lock);
 	if (reg->hr_task)
-		pid = reg->hr_task->pid;
+		pid = task_pid_nr(reg->hr_task);
 	spin_unlock(&o2hb_live_lock);
 
 	if (!pid)
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 75cd877..cd04606 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -192,7 +192,7 @@
  * previous token if args expands to nothing.
  */
 #define __mlog_printk(level, fmt, args...)				\
-	printk(level "(%u,%lu):%s:%d " fmt, current->pid,		\
+	printk(level "(%u,%lu):%s:%d " fmt, task_pid_nr(current),	\
 	       __mlog_cpu_guess, __PRETTY_FUNCTION__, __LINE__ ,	\
 	       ##args)
 
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index a2c3316..2fde7bf 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -259,7 +259,7 @@
 	struct dlm_lock_resource *res;
 
 	mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
-	     dlm->name, dlm->dlm_reco_thread_task->pid,
+	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
 	     dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive",
 	     dlm->reco.dead_node, dlm->reco.new_master);
 
@@ -420,7 +420,7 @@
 	if (dlm_in_recovery(dlm)) {
 		mlog(0, "%s: reco thread %d in recovery: "
 		     "state=%d, master=%u, dead=%u\n",
-		     dlm->name, dlm->dlm_reco_thread_task->pid,
+		     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
 		     dlm->reco.state, dlm->reco.new_master,
 		     dlm->reco.dead_node);
 	}
@@ -483,7 +483,7 @@
 		return 0;
 	}
 	mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
-	     dlm->name, dlm->dlm_reco_thread_task->pid,
+	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
 	     dlm->reco.dead_node);
 	spin_unlock(&dlm->spinlock);
 
@@ -507,7 +507,7 @@
 		mlog(0, "another node will master this recovery session.\n");
 	}
 	mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n",
-	     dlm->name, dlm->dlm_reco_thread_task->pid, dlm->reco.new_master,
+	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master,
 	     dlm->node_num, dlm->reco.dead_node);
 
 	/* it is safe to start everything back up here
@@ -520,7 +520,7 @@
 
 master_here:
 	mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n",
-	     dlm->dlm_reco_thread_task->pid,
+	     task_pid_nr(dlm->dlm_reco_thread_task),
 	     dlm->name, dlm->reco.dead_node, dlm->node_num);
 
 	status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 27b59f5..63c95af 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -77,6 +77,7 @@
 #include <linux/cpuset.h>
 #include <linux/rcupdate.h>
 #include <linux/delayacct.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -145,8 +146,7 @@
 					    TASK_UNINTERRUPTIBLE |
 					    TASK_STOPPED |
 					    TASK_TRACED)) |
-			(tsk->exit_state & (EXIT_ZOMBIE |
-					    EXIT_DEAD));
+					   tsk->exit_state;
 	const char **p = &task_state_array[0];
 
 	while (state) {
@@ -161,8 +161,15 @@
 	struct group_info *group_info;
 	int g;
 	struct fdtable *fdt = NULL;
+	struct pid_namespace *ns;
+	pid_t ppid, tpid;
 
+	ns = current->nsproxy->pid_ns;
 	rcu_read_lock();
+	ppid = pid_alive(p) ?
+		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
+	tpid = pid_alive(p) && p->ptrace ?
+		task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0;
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
@@ -172,9 +179,9 @@
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
-		p->tgid, p->pid,
-		pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
-		pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
+		task_tgid_nr_ns(p, ns),
+		task_pid_nr_ns(p, ns),
+		ppid, tpid,
 		p->uid, p->euid, p->suid, p->fsuid,
 		p->gid, p->egid, p->sgid, p->fsgid);
 
@@ -394,6 +401,9 @@
 	unsigned long rsslim = 0;
 	char tcomm[sizeof(task->comm)];
 	unsigned long flags;
+	struct pid_namespace *ns;
+
+	ns = current->nsproxy->pid_ns;
 
 	state = *get_task_state(task);
 	vsize = eip = esp = 0;
@@ -416,7 +426,7 @@
 		struct signal_struct *sig = task->signal;
 
 		if (sig->tty) {
-			tty_pgrp = pid_nr(sig->tty->pgrp);
+			tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns);
 			tty_nr = new_encode_dev(tty_devnum(sig->tty));
 		}
 
@@ -446,12 +456,12 @@
 			maj_flt += sig->maj_flt;
 			utime = cputime_add(utime, sig->utime);
 			stime = cputime_add(stime, sig->stime);
-			gtime += cputime_add(gtime, sig->gtime);
+			gtime = cputime_add(gtime, sig->gtime);
 		}
 
-		sid = signal_session(sig);
-		pgid = process_group(task);
-		ppid = rcu_dereference(task->real_parent)->tgid;
+		sid = task_session_nr_ns(task, ns);
+		pgid = task_pgrp_nr_ns(task, ns);
+		ppid = task_ppid_nr_ns(task, ns);
 
 		unlock_task_sighand(task, &flags);
 	}
@@ -483,7 +493,7 @@
 	res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
 %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
-		task->pid,
+		task_pid_nr_ns(task, ns),
 		tcomm,
 		state,
 		ppid,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4fe74d1..39a3d7c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -63,16 +63,19 @@
 #include <linux/mm.h>
 #include <linux/rcupdate.h>
 #include <linux/kallsyms.h>
+#include <linux/resource.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
 #include <linux/poll.h>
 #include <linux/nsproxy.h>
 #include <linux/oom.h>
 #include <linux/elf.h>
+#include <linux/pid_namespace.h>
 #include "internal.h"
 
 /* NOTE:
@@ -301,6 +304,78 @@
 	return sprintf(buffer, "%lu\n", points);
 }
 
+struct limit_names {
+	char *name;
+	char *unit;
+};
+
+static const struct limit_names lnames[RLIM_NLIMITS] = {
+	[RLIMIT_CPU] = {"Max cpu time", "ms"},
+	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
+	[RLIMIT_DATA] = {"Max data size", "bytes"},
+	[RLIMIT_STACK] = {"Max stack size", "bytes"},
+	[RLIMIT_CORE] = {"Max core file size", "bytes"},
+	[RLIMIT_RSS] = {"Max resident set", "bytes"},
+	[RLIMIT_NPROC] = {"Max processes", "processes"},
+	[RLIMIT_NOFILE] = {"Max open files", "files"},
+	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
+	[RLIMIT_AS] = {"Max address space", "bytes"},
+	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
+	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
+	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
+	[RLIMIT_NICE] = {"Max nice priority", NULL},
+	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
+};
+
+/* Display limits for a process */
+static int proc_pid_limits(struct task_struct *task, char *buffer)
+{
+	unsigned int i;
+	int count = 0;
+	unsigned long flags;
+	char *bufptr = buffer;
+
+	struct rlimit rlim[RLIM_NLIMITS];
+
+	rcu_read_lock();
+	if (!lock_task_sighand(task,&flags)) {
+		rcu_read_unlock();
+		return 0;
+	}
+	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
+	unlock_task_sighand(task, &flags);
+	rcu_read_unlock();
+
+	/*
+	 * print the file header
+	 */
+	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+			"Limit", "Soft Limit", "Hard Limit", "Units");
+
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		if (rlim[i].rlim_cur == RLIM_INFINITY)
+			count += sprintf(&bufptr[count], "%-25s %-20s ",
+					 lnames[i].name, "unlimited");
+		else
+			count += sprintf(&bufptr[count], "%-25s %-20lu ",
+					 lnames[i].name, rlim[i].rlim_cur);
+
+		if (rlim[i].rlim_max == RLIM_INFINITY)
+			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+		else
+			count += sprintf(&bufptr[count], "%-20lu ",
+					 rlim[i].rlim_max);
+
+		if (lnames[i].unit)
+			count += sprintf(&bufptr[count], "%-10s\n",
+					 lnames[i].unit);
+		else
+			count += sprintf(&bufptr[count], "\n");
+	}
+
+	return count;
+}
+
 /************************************************************************/
 /*                       Here the fs part begins                        */
 /************************************************************************/
@@ -349,18 +424,21 @@
 static int mounts_open(struct inode *inode, struct file *file)
 {
 	struct task_struct *task = get_proc_task(inode);
+	struct nsproxy *nsp;
 	struct mnt_namespace *ns = NULL;
 	struct proc_mounts *p;
 	int ret = -EINVAL;
 
 	if (task) {
-		task_lock(task);
-		if (task->nsproxy) {
-			ns = task->nsproxy->mnt_ns;
+		rcu_read_lock();
+		nsp = task_nsproxy(task);
+		if (nsp) {
+			ns = nsp->mnt_ns;
 			if (ns)
 				get_mnt_ns(ns);
 		}
-		task_unlock(task);
+		rcu_read_unlock();
+
 		put_task_struct(task);
 	}
 
@@ -423,16 +501,20 @@
 
 	if (!ret) {
 		struct seq_file *m = file->private_data;
+		struct nsproxy *nsp;
 		struct mnt_namespace *mnt_ns = NULL;
 		struct task_struct *task = get_proc_task(inode);
 
 		if (task) {
-			task_lock(task);
-			if (task->nsproxy)
-				mnt_ns = task->nsproxy->mnt_ns;
-			if (mnt_ns)
-				get_mnt_ns(mnt_ns);
-			task_unlock(task);
+			rcu_read_lock();
+			nsp = task_nsproxy(task);
+			if (nsp) {
+				mnt_ns = nsp->mnt_ns;
+				if (mnt_ns)
+					get_mnt_ns(mnt_ns);
+			}
+			rcu_read_unlock();
+
 			put_task_struct(task);
 		}
 
@@ -1437,7 +1519,7 @@
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *p = get_proc_task(inode);
-	unsigned int fd, tid, ino;
+	unsigned int fd, ino;
 	int retval;
 	struct files_struct * files;
 	struct fdtable *fdt;
@@ -1446,7 +1528,6 @@
 	if (!p)
 		goto out_no_task;
 	retval = 0;
-	tid = p->pid;
 
 	fd = filp->f_pos;
 	switch (fd) {
@@ -1681,7 +1762,6 @@
 		const struct pid_entry *ents, unsigned int nents)
 {
 	int i;
-	int pid;
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task = get_proc_task(inode);
@@ -1694,7 +1774,6 @@
 		goto out_no_task;
 
 	ret = 0;
-	pid = task->pid;
 	i = filp->f_pos;
 	switch (i) {
 	case 0:
@@ -1928,14 +2007,14 @@
 			      int buflen)
 {
 	char tmp[PROC_NUMBUF];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", task_tgid_vnr(current));
 	return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char tmp[PROC_NUMBUF];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", task_tgid_vnr(current));
 	return ERR_PTR(vfs_follow_link(nd,tmp));
 }
 
@@ -2101,6 +2180,7 @@
 	REG("environ",    S_IRUSR, environ),
 	INF("auxv",       S_IRUSR, pid_auxv),
 	INF("status",     S_IRUGO, pid_status),
+	INF("limits",	  S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
 #endif
@@ -2130,9 +2210,12 @@
 #ifdef CONFIG_SCHEDSTATS
 	INF("schedstat",  S_IRUGO, pid_schedstat),
 #endif
-#ifdef CONFIG_CPUSETS
+#ifdef CONFIG_PROC_PID_CPUSET
 	REG("cpuset",     S_IRUGO, cpuset),
 #endif
+#ifdef CONFIG_CGROUPS
+	REG("cgroup",  S_IRUGO, cgroup),
+#endif
 	INF("oom_score",  S_IRUGO, oom_score),
 	REG("oom_adj",    S_IRUGO|S_IWUSR, oom_adjust),
 #ifdef CONFIG_AUDITSYSCALL
@@ -2193,27 +2276,27 @@
  *       that no dcache entries will exist at process exit time it
  *       just makes it very unlikely that any will persist.
  */
-void proc_flush_task(struct task_struct *task)
+static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 {
 	struct dentry *dentry, *leader, *dir;
 	char buf[PROC_NUMBUF];
 	struct qstr name;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
-	dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+	name.len = snprintf(buf, sizeof(buf), "%d", pid);
+	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (dentry) {
 		shrink_dcache_parent(dentry);
 		d_drop(dentry);
 		dput(dentry);
 	}
 
-	if (thread_group_leader(task))
+	if (tgid == 0)
 		goto out;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
-	leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
+	leader = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (!leader)
 		goto out;
 
@@ -2224,7 +2307,7 @@
 		goto out_put_leader;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
+	name.len = snprintf(buf, sizeof(buf), "%d", pid);
 	dentry = d_hash_and_lookup(dir, &name);
 	if (dentry) {
 		shrink_dcache_parent(dentry);
@@ -2239,6 +2322,36 @@
 	return;
 }
 
+/*
+ * when flushing dentries from proc one need to flush them from global
+ * proc (proc_mnt) and from all the namespaces' procs this task was seen
+ * in. this call is supposed to make all this job.
+ */
+
+void proc_flush_task(struct task_struct *task)
+{
+	int i, leader;
+	struct pid *pid, *tgid;
+	struct upid *upid;
+
+	leader = thread_group_leader(task);
+	proc_flush_task_mnt(proc_mnt, task->pid, leader ? task->tgid : 0);
+	pid = task_pid(task);
+	if (pid->level == 0)
+		return;
+
+	tgid = task_tgid(task);
+	for (i = 1; i <= pid->level; i++) {
+		upid = &pid->numbers[i];
+		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
+				leader ? 0 : tgid->numbers[i].nr);
+	}
+
+	upid = &pid->numbers[pid->level];
+	if (upid->nr == 1)
+		pid_ns_release_proc(upid->ns);
+}
+
 static struct dentry *proc_pid_instantiate(struct inode *dir,
 					   struct dentry * dentry,
 					   struct task_struct *task, const void *ptr)
@@ -2274,6 +2387,7 @@
 	struct dentry *result = ERR_PTR(-ENOENT);
 	struct task_struct *task;
 	unsigned tgid;
+	struct pid_namespace *ns;
 
 	result = proc_base_lookup(dir, dentry);
 	if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
@@ -2283,8 +2397,9 @@
 	if (tgid == ~0U)
 		goto out;
 
+	ns = dentry->d_sb->s_fs_info;
 	rcu_read_lock();
-	task = find_task_by_pid(tgid);
+	task = find_task_by_pid_ns(tgid, ns);
 	if (task)
 		get_task_struct(task);
 	rcu_read_unlock();
@@ -2301,7 +2416,8 @@
  * Find the first task with tgid >= tgid
  *
  */
-static struct task_struct *next_tgid(unsigned int tgid)
+static struct task_struct *next_tgid(unsigned int tgid,
+		struct pid_namespace *ns)
 {
 	struct task_struct *task;
 	struct pid *pid;
@@ -2309,9 +2425,9 @@
 	rcu_read_lock();
 retry:
 	task = NULL;
-	pid = find_ge_pid(tgid);
+	pid = find_ge_pid(tgid, ns);
 	if (pid) {
-		tgid = pid->nr + 1;
+		tgid = pid_nr_ns(pid, ns) + 1;
 		task = pid_task(pid, PIDTYPE_PID);
 		/* What we to know is if the pid we have find is the
 		 * pid of a thread_group_leader.  Testing for task
@@ -2351,6 +2467,7 @@
 	struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
 	struct task_struct *task;
 	int tgid;
+	struct pid_namespace *ns;
 
 	if (!reaper)
 		goto out_no_task;
@@ -2361,11 +2478,12 @@
 			goto out;
 	}
 
+	ns = filp->f_dentry->d_sb->s_fs_info;
 	tgid = filp->f_pos - TGID_OFFSET;
-	for (task = next_tgid(tgid);
+	for (task = next_tgid(tgid, ns);
 	     task;
-	     put_task_struct(task), task = next_tgid(tgid + 1)) {
-		tgid = task->pid;
+	     put_task_struct(task), task = next_tgid(tgid + 1, ns)) {
+		tgid = task_pid_nr_ns(task, ns);
 		filp->f_pos = tgid + TGID_OFFSET;
 		if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) {
 			put_task_struct(task);
@@ -2388,6 +2506,7 @@
 	REG("environ",   S_IRUSR, environ),
 	INF("auxv",      S_IRUSR, pid_auxv),
 	INF("status",    S_IRUGO, pid_status),
+	INF("limits",	 S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
 #endif
@@ -2416,9 +2535,12 @@
 #ifdef CONFIG_SCHEDSTATS
 	INF("schedstat", S_IRUGO, pid_schedstat),
 #endif
-#ifdef CONFIG_CPUSETS
+#ifdef CONFIG_PROC_PID_CPUSET
 	REG("cpuset",    S_IRUGO, cpuset),
 #endif
+#ifdef CONFIG_CGROUPS
+	REG("cgroup",  S_IRUGO, cgroup),
+#endif
 	INF("oom_score", S_IRUGO, oom_score),
 	REG("oom_adj",   S_IRUGO|S_IWUSR, oom_adjust),
 #ifdef CONFIG_AUDITSYSCALL
@@ -2486,6 +2608,7 @@
 	struct task_struct *task;
 	struct task_struct *leader = get_proc_task(dir);
 	unsigned tid;
+	struct pid_namespace *ns;
 
 	if (!leader)
 		goto out_no_task;
@@ -2494,14 +2617,15 @@
 	if (tid == ~0U)
 		goto out;
 
+	ns = dentry->d_sb->s_fs_info;
 	rcu_read_lock();
-	task = find_task_by_pid(tid);
+	task = find_task_by_pid_ns(tid, ns);
 	if (task)
 		get_task_struct(task);
 	rcu_read_unlock();
 	if (!task)
 		goto out;
-	if (leader->tgid != task->tgid)
+	if (!same_thread_group(leader, task))
 		goto out_drop_task;
 
 	result = proc_task_instantiate(dir, dentry, task, NULL);
@@ -2526,14 +2650,14 @@
  * threads past it.
  */
 static struct task_struct *first_tid(struct task_struct *leader,
-					int tid, int nr)
+		int tid, int nr, struct pid_namespace *ns)
 {
 	struct task_struct *pos;
 
 	rcu_read_lock();
 	/* Attempt to start with the pid of a thread */
 	if (tid && (nr > 0)) {
-		pos = find_task_by_pid(tid);
+		pos = find_task_by_pid_ns(tid, ns);
 		if (pos && (pos->group_leader == leader))
 			goto found;
 	}
@@ -2602,6 +2726,7 @@
 	ino_t ino;
 	int tid;
 	unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
+	struct pid_namespace *ns;
 
 	task = get_proc_task(inode);
 	if (!task)
@@ -2635,12 +2760,13 @@
 	/* f_version caches the tgid value that the last readdir call couldn't
 	 * return. lseek aka telldir automagically resets f_version to 0.
 	 */
+	ns = filp->f_dentry->d_sb->s_fs_info;
 	tid = (int)filp->f_version;
 	filp->f_version = 0;
-	for (task = first_tid(leader, tid, pos - 2);
+	for (task = first_tid(leader, tid, pos - 2, ns);
 	     task;
 	     task = next_tid(task), pos++) {
-		tid = task->pid;
+		tid = task_pid_nr_ns(task, ns);
 		if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
 			/* returning this tgid failed, save it as the first
 			 * pid for the next readir call */
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 99ca004..abe6a3f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -448,7 +448,7 @@
 	return NULL;
 }			
 
-int proc_fill_super(struct super_block *s, void *data, int silent)
+int proc_fill_super(struct super_block *s)
 {
 	struct inode * root_inode;
 
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index d6dc72c..e0d064e 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -91,7 +91,8 @@
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid);
+		nr_running(), nr_threads,
+		task_active_pid_ns(current)->last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index cf30466..ec9cb3b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,32 +18,90 @@
 #include <linux/bitops.h>
 #include <linux/smp_lock.h>
 #include <linux/mount.h>
+#include <linux/pid_namespace.h>
 
 #include "internal.h"
 
 struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
 
+static int proc_test_super(struct super_block *sb, void *data)
+{
+	return sb->s_fs_info == data;
+}
+
+static int proc_set_super(struct super_block *sb, void *data)
+{
+	struct pid_namespace *ns;
+
+	ns = (struct pid_namespace *)data;
+	sb->s_fs_info = get_pid_ns(ns);
+	return set_anon_super(sb, NULL);
+}
+
 static int proc_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
+	int err;
+	struct super_block *sb;
+	struct pid_namespace *ns;
+	struct proc_inode *ei;
+
 	if (proc_mnt) {
 		/* Seed the root directory with a pid so it doesn't need
 		 * to be special in base.c.  I would do this earlier but
 		 * the only task alive when /proc is mounted the first time
 		 * is the init_task and it doesn't have any pids.
 		 */
-		struct proc_inode *ei;
 		ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
 		if (!ei->pid)
 			ei->pid = find_get_pid(1);
 	}
-	return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
+
+	if (flags & MS_KERNMOUNT)
+		ns = (struct pid_namespace *)data;
+	else
+		ns = current->nsproxy->pid_ns;
+
+	sb = sget(fs_type, proc_test_super, proc_set_super, ns);
+	if (IS_ERR(sb))
+		return PTR_ERR(sb);
+
+	if (!sb->s_root) {
+		sb->s_flags = flags;
+		err = proc_fill_super(sb);
+		if (err) {
+			up_write(&sb->s_umount);
+			deactivate_super(sb);
+			return err;
+		}
+
+		ei = PROC_I(sb->s_root->d_inode);
+		if (!ei->pid) {
+			rcu_read_lock();
+			ei->pid = get_pid(find_pid_ns(1, ns));
+			rcu_read_unlock();
+		}
+
+		sb->s_flags |= MS_ACTIVE;
+		ns->proc_mnt = mnt;
+	}
+
+	return simple_set_mnt(mnt, sb);
+}
+
+static void proc_kill_sb(struct super_block *sb)
+{
+	struct pid_namespace *ns;
+
+	ns = (struct pid_namespace *)sb->s_fs_info;
+	kill_anon_super(sb);
+	put_pid_ns(ns);
 }
 
 static struct file_system_type proc_fs_type = {
 	.name		= "proc",
 	.get_sb		= proc_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= proc_kill_sb,
 };
 
 void __init proc_root_init(void)
@@ -54,12 +112,13 @@
 	err = register_filesystem(&proc_fs_type);
 	if (err)
 		return;
-	proc_mnt = kern_mount(&proc_fs_type);
+	proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
 	err = PTR_ERR(proc_mnt);
 	if (IS_ERR(proc_mnt)) {
 		unregister_filesystem(&proc_fs_type);
 		return;
 	}
+
 	proc_misc_init();
 
 	proc_net_init();
@@ -153,6 +212,22 @@
 	.parent		= &proc_root,
 };
 
+int pid_ns_prepare_proc(struct pid_namespace *ns)
+{
+	struct vfsmount *mnt;
+
+	mnt = kern_mount_data(&proc_fs_type, ns);
+	if (IS_ERR(mnt))
+		return PTR_ERR(mnt);
+
+	return 0;
+}
+
+void pid_ns_release_proc(struct pid_namespace *ns)
+{
+	mntput(ns->proc_mnt);
+}
+
 EXPORT_SYMBOL(proc_symlink);
 EXPORT_SYMBOL(proc_mkdir);
 EXPORT_SYMBOL(create_proc_entry);
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 2a5dd34..16b331d 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -47,7 +47,9 @@
     test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
 
 static inline void get_bit_address(struct super_block *s,
-				   b_blocknr_t block, int *bmap_nr, int *offset)
+				   b_blocknr_t block,
+				   unsigned int *bmap_nr,
+				   unsigned int *offset)
 {
 	/* It is in the bitmap block number equal to the block
 	 * number divided by the number of bits in a block. */
@@ -56,10 +58,10 @@
 	*offset = block & ((s->s_blocksize << 3) - 1);
 }
 
-#ifdef CONFIG_REISERFS_CHECK
 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
 {
-	int bmap, offset;
+	unsigned int bmap, offset;
+	unsigned int bmap_count = reiserfs_bmap_count(s);
 
 	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
 		reiserfs_warning(s,
@@ -75,25 +77,26 @@
 	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
 			      &(REISERFS_SB(s)->s_properties)))) {
 		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
-		if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) {
+		if (block >= bmap1 &&
+		    block <= bmap1 + bmap_count) {
 			reiserfs_warning(s, "vs: 4019: is_reusable: "
 					 "bitmap block %lu(%u) can't be freed or reused",
-					 block, SB_BMAP_NR(s));
+					 block, bmap_count);
 			return 0;
 		}
 	} else {
 		if (offset == 0) {
 			reiserfs_warning(s, "vs: 4020: is_reusable: "
 					 "bitmap block %lu(%u) can't be freed or reused",
-					 block, SB_BMAP_NR(s));
+					 block, bmap_count);
 			return 0;
 		}
 	}
 
-	if (bmap >= SB_BMAP_NR(s)) {
+	if (bmap >= bmap_count) {
 		reiserfs_warning(s,
 				 "vs-4030: is_reusable: there is no so many bitmap blocks: "
-				 "block=%lu, bitmap_nr=%d", block, bmap);
+				 "block=%lu, bitmap_nr=%u", block, bmap);
 		return 0;
 	}
 
@@ -106,12 +109,11 @@
 
 	return 1;
 }
-#endif				/* CONFIG_REISERFS_CHECK */
 
 /* searches in journal structures for a given block number (bmap, off). If block
    is found in reiserfs journal it suggests next free block candidate to test. */
-static inline int is_block_in_journal(struct super_block *s, int bmap, int
-				      off, int *next)
+static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
+				      int off, int *next)
 {
 	b_blocknr_t tmp;
 
@@ -132,8 +134,8 @@
 /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
  * block; */
 static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
-			     int bmap_n, int *beg, int boundary, int min,
-			     int max, int unfm)
+			     unsigned int bmap_n, int *beg, int boundary,
+			     int min, int max, int unfm)
 {
 	struct super_block *s = th->t_super;
 	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
@@ -143,8 +145,8 @@
 
 	BUG_ON(!th->t_trans_id);
 
-	RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)",
-	       bmap_n, SB_BMAP_NR(s) - 1);
+	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
+	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
 	PROC_INFO_INC(s, scan_bitmap.bmap);
 /* this is unclear and lacks comments, explain how journal bitmaps
    work here for the reader.  Convey a sense of the design here. What
@@ -249,12 +251,12 @@
 	} else {
 		hash_in = (char *)(&id);
 		hash = keyed_hash(hash_in, 4);
-		bm = hash % SB_BMAP_NR(s);
+		bm = hash % reiserfs_bmap_count(s);
 		if (!bm)
 			bm = 1;
 	}
 	/* this can only be true when SB_BMAP_NR = 1 */
-	if (bm >= SB_BMAP_NR(s))
+	if (bm >= reiserfs_bmap_count(s))
 		bm = 0;
 	return bm;
 }
@@ -273,7 +275,7 @@
 	 * to make a better decision. This favors long-term performace gain
 	 * with a better on-disk layout vs. a short term gain of skipping the
 	 * read and potentially having a bad placement. */
-	if (info->first_zero_hint == 0) {
+	if (info->free_count == UINT_MAX) {
 		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
 		brelse(bh);
 	}
@@ -309,16 +311,16 @@
  * bitmap and place new blocks there. Returns number of allocated blocks. */
 static int scan_bitmap(struct reiserfs_transaction_handle *th,
 		       b_blocknr_t * start, b_blocknr_t finish,
-		       int min, int max, int unfm, unsigned long file_block)
+		       int min, int max, int unfm, sector_t file_block)
 {
 	int nr_allocated = 0;
 	struct super_block *s = th->t_super;
 	/* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
 	 * - Hans, it is not a block number - Zam. */
 
-	int bm, off;
-	int end_bm, end_off;
-	int off_max = s->s_blocksize << 3;
+	unsigned int bm, off;
+	unsigned int end_bm, end_off;
+	unsigned int off_max = s->s_blocksize << 3;
 
 	BUG_ON(!th->t_trans_id);
 
@@ -328,10 +330,10 @@
 
 	get_bit_address(s, *start, &bm, &off);
 	get_bit_address(s, finish, &end_bm, &end_off);
-	if (bm > SB_BMAP_NR(s))
+	if (bm > reiserfs_bmap_count(s))
 		return 0;
-	if (end_bm > SB_BMAP_NR(s))
-		end_bm = SB_BMAP_NR(s);
+	if (end_bm > reiserfs_bmap_count(s))
+		end_bm = reiserfs_bmap_count(s);
 
 	/* When the bitmap is more than 10% free, anyone can allocate.
 	 * When it's less than 10% free, only files that already use the
@@ -385,7 +387,7 @@
 	struct reiserfs_super_block *rs;
 	struct buffer_head *sbh, *bmbh;
 	struct reiserfs_bitmap_info *apbi;
-	int nr, offset;
+	unsigned int nr, offset;
 
 	BUG_ON(!th->t_trans_id);
 
@@ -397,10 +399,12 @@
 
 	get_bit_address(s, block, &nr, &offset);
 
-	if (nr >= sb_bmap_nr(rs)) {
+	if (nr >= reiserfs_bmap_count(s)) {
 		reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
-				 "block %lu is out of range on %s",
-				 block, reiserfs_bdevname(s));
+				 "block %lu is out of range on %s "
+				 "(nr=%u,max=%u)", block,
+				 reiserfs_bdevname(s), nr,
+				 reiserfs_bmap_count(s));
 		return;
 	}
 
@@ -434,12 +438,19 @@
 			 int for_unformatted)
 {
 	struct super_block *s = th->t_super;
-
 	BUG_ON(!th->t_trans_id);
 
 	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
-	RFALSE(is_reusable(s, block, 1) == 0,
-	       "vs-4071: can not free such block");
+	if (!is_reusable(s, block, 1))
+		return;
+
+	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
+		reiserfs_panic(th->t_super, "bitmap-4072",
+			       "Trying to free block outside file system "
+			       "boundaries (%lu > %lu)",
+			       block, sb_block_count(REISERFS_SB(s)->s_rs));
+		return;
+	}
 	/* mark it before we clear it, just in case */
 	journal_mark_freed(th, s, block);
 	_reiserfs_free_block(th, inode, block, for_unformatted);
@@ -449,11 +460,11 @@
 static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
 					 struct inode *inode, b_blocknr_t block)
 {
+	BUG_ON(!th->t_trans_id);
 	RFALSE(!th->t_super,
 	       "vs-4060: trying to free block on nonexistent device");
-	RFALSE(is_reusable(th->t_super, block, 1) == 0,
-	       "vs-4070: can not free such block");
-	BUG_ON(!th->t_trans_id);
+	if (!is_reusable(th->t_super, block, 1))
+		return;
 	_reiserfs_free_block(th, inode, block, 1);
 }
 
@@ -1207,27 +1218,22 @@
 {
 	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
 
-	info->first_zero_hint = 1 << (sb->s_blocksize_bits + 3);
+	/* The first bit must ALWAYS be 1 */
+	BUG_ON(!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data));
+
+	info->free_count = 0;
 
 	while (--cur >= (unsigned long *)bh->b_data) {
-		int base = ((char *)cur - bh->b_data) << 3;
+		int i;
 
 		/* 0 and ~0 are special, we can optimize for them */
-		if (*cur == 0) {
-			info->first_zero_hint = base;
+		if (*cur == 0)
 			info->free_count += BITS_PER_LONG;
-		} else if (*cur != ~0L) {       /* A mix, investigate */
-			int b;
-			for (b = BITS_PER_LONG - 1; b >= 0; b--) {
-				if (!reiserfs_test_le_bit(b, cur)) {
-					info->first_zero_hint = base + b;
+		else if (*cur != ~0L)	/* A mix, investigate */
+			for (i = BITS_PER_LONG - 1; i >= 0; i--)
+				if (!reiserfs_test_le_bit(i, cur))
 					info->free_count++;
-				}
-			}
-		}
 	}
-	/* The first bit must ALWAYS be 1 */
-	BUG_ON(info->first_zero_hint == 0);
 }
 
 struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
@@ -1257,7 +1263,7 @@
 		BUG_ON(!buffer_uptodate(bh));
 		BUG_ON(atomic_read(&bh->b_count) == 0);
 
-		if (info->first_zero_hint == 0)
+		if (info->free_count == UINT_MAX)
 			reiserfs_cache_bitmap_metadata(sb, bh, info);
 	}
 
@@ -1267,12 +1273,13 @@
 int reiserfs_init_bitmap_cache(struct super_block *sb)
 {
 	struct reiserfs_bitmap_info *bitmap;
+	unsigned int bmap_nr = reiserfs_bmap_count(sb);
 
-	bitmap = vmalloc(sizeof (*bitmap) * SB_BMAP_NR(sb));
+	bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
 	if (bitmap == NULL)
 		return -ENOMEM;
 
-	memset(bitmap, 0, sizeof (*bitmap) * SB_BMAP_NR(sb));
+	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
 
 	SB_AP_BITMAP(sb) = bitmap;
 
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0804289..a991af9 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -199,7 +199,7 @@
 // files which were created in the earlier version can not be longer,
 // than 2 gb
 //
-static int file_capable(struct inode *inode, long block)
+static int file_capable(struct inode *inode, sector_t block)
 {
 	if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||	// it is new file.
 	    block < (1 << (31 - inode->i_sb->s_blocksize_bits)))	// old file, but 'block' is inside of 2gb
@@ -242,7 +242,7 @@
 // Please improve the english/clarity in the comment above, as it is
 // hard to understand.
 
-static int _get_block_create_0(struct inode *inode, long block,
+static int _get_block_create_0(struct inode *inode, sector_t block,
 			       struct buffer_head *bh_result, int args)
 {
 	INITIALIZE_PATH(path);
@@ -250,7 +250,7 @@
 	struct buffer_head *bh;
 	struct item_head *ih, tmp_ih;
 	int fs_gen;
-	int blocknr;
+	b_blocknr_t blocknr;
 	char *p = NULL;
 	int chars;
 	int ret;
@@ -569,7 +569,7 @@
 }
 
 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
-				  long block,
+				  sector_t block,
 				  struct inode *inode,
 				  b_blocknr_t * allocated_block_nr,
 				  struct treepath *path, int flags)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 4cad9e7..bb05a3e 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -219,11 +219,12 @@
 	}
 }
 
-static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
+static int set_bit_in_list_bitmap(struct super_block *p_s_sb,
+				  b_blocknr_t block,
 				  struct reiserfs_list_bitmap *jb)
 {
-	int bmap_nr = block / (p_s_sb->s_blocksize << 3);
-	int bit_nr = block % (p_s_sb->s_blocksize << 3);
+	unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3);
+	unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3);
 
 	if (!jb->bitmaps[bmap_nr]) {
 		jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
@@ -239,7 +240,7 @@
 	if (jb->bitmaps == NULL)
 		return;
 
-	for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) {
+	for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) {
 		if (jb->bitmaps[i]) {
 			free_bitmap_node(p_s_sb, jb->bitmaps[i]);
 			jb->bitmaps[i] = NULL;
@@ -289,7 +290,7 @@
 */
 int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
 				   struct reiserfs_list_bitmap *jb_array,
-				   int bmap_nr)
+				   unsigned int bmap_nr)
 {
 	int i;
 	int failed = 0;
@@ -483,7 +484,7 @@
 **
 */
 int reiserfs_in_journal(struct super_block *p_s_sb,
-			int bmap_nr, int bit_nr, int search_all,
+			unsigned int bmap_nr, int bit_nr, int search_all,
 			b_blocknr_t * next_zero_bit)
 {
 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
@@ -1013,7 +1014,7 @@
 			     struct reiserfs_journal_list *jl, int flushall)
 {
 	int i;
-	int bn;
+	b_blocknr_t bn;
 	struct buffer_head *tbh = NULL;
 	unsigned long trans_id = jl->j_trans_id;
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2307,8 +2308,9 @@
    Right now it is only used from journal code. But later we might use it
    from other places.
    Note: Do not use journal_getblk/sb_getblk functions here! */
-static struct buffer_head *reiserfs_breada(struct block_device *dev, int block,
-					   int bufsize, unsigned int max_block)
+static struct buffer_head *reiserfs_breada(struct block_device *dev,
+					   b_blocknr_t block, int bufsize,
+					   b_blocknr_t max_block)
 {
 	struct buffer_head *bhlist[BUFNR];
 	unsigned int blocks = BUFNR;
@@ -2732,7 +2734,7 @@
 	journal->j_persistent_trans = 0;
 	if (reiserfs_allocate_list_bitmaps(p_s_sb,
 					   journal->j_list_bitmap,
-					   SB_BMAP_NR(p_s_sb)))
+					   reiserfs_bmap_count(p_s_sb)))
 		goto free_and_return;
 	allocate_bitmap_nodes(p_s_sb);
 
@@ -2740,7 +2742,7 @@
 	SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
 						 REISERFS_OLD_DISK_OFFSET_IN_BYTES
 						 / p_s_sb->s_blocksize +
-						 SB_BMAP_NR(p_s_sb) +
+						 reiserfs_bmap_count(p_s_sb) +
 						 1 :
 						 REISERFS_DISK_OFFSET_IN_BYTES /
 						 p_s_sb->s_blocksize + 2);
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index bc808a9..5e7388b 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -356,13 +356,11 @@
 void reiserfs_panic(struct super_block *sb, const char *fmt, ...)
 {
 	do_reiserfs_warning(fmt);
-	printk(KERN_EMERG "REISERFS: panic (device %s): %s\n",
-	       reiserfs_bdevname(sb), error_buf);
-	BUG();
 
-	/* this is not actually called, but makes reiserfs_panic() "noreturn" */
-	panic("REISERFS: panic (device %s): %s\n",
-	      reiserfs_bdevname(sb), error_buf);
+	dump_stack();
+
+	panic(KERN_EMERG "REISERFS: panic (device %s): %s\n",
+	       reiserfs_bdevname(sb), error_buf);
 }
 
 void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 976cc78..f71c394 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -61,7 +61,8 @@
 	}
 
 	/* count used bits in last bitmap block */
-	block_r = SB_BLOCK_COUNT(s) - (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8;
+	block_r = SB_BLOCK_COUNT(s) -
+			(reiserfs_bmap_count(s) - 1) * s->s_blocksize * 8;
 
 	/* count bitmap blocks in new fs */
 	bmap_nr_new = block_count_new / (s->s_blocksize * 8);
@@ -73,7 +74,7 @@
 
 	/* save old values */
 	block_count = SB_BLOCK_COUNT(s);
-	bmap_nr = SB_BMAP_NR(s);
+	bmap_nr = reiserfs_bmap_count(s);
 
 	/* resizing of reiserfs bitmaps (journal and real), if needed */
 	if (bmap_nr_new > bmap_nr) {
@@ -119,7 +120,7 @@
 			return -ENOMEM;
 		}
 		memset(bitmap, 0,
-		       sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
+		       sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
 		for (i = 0; i < bmap_nr; i++)
 			bitmap[i] = old_bitmap[i];
 
@@ -143,7 +144,6 @@
 			mark_buffer_dirty(bh);
 			sync_dirty_buffer(bh);
 			// update bitmap_info stuff
-			bitmap[i].first_zero_hint = 1;
 			bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
 			brelse(bh);
 		}
@@ -173,8 +173,6 @@
 	for (i = block_r; i < s->s_blocksize * 8; i++)
 		reiserfs_test_and_clear_le_bit(i, bh->b_data);
 	info->free_count += s->s_blocksize * 8 - block_r;
-	if (!info->first_zero_hint)
-		info->first_zero_hint = block_r;
 
 	journal_mark_dirty(&th, s, bh);
 	brelse(bh);
@@ -196,9 +194,6 @@
 	brelse(bh);
 
 	info->free_count -= s->s_blocksize * 8 - block_r_new;
-	/* Extreme case where last bitmap is the only valid block in itself. */
-	if (!info->free_count)
-		info->first_zero_hint = 0;
 	/* update super */
 	reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
 	free_blocks = SB_FREE_BLOCKS(s);
@@ -206,7 +201,7 @@
 			   free_blocks + (block_count_new - block_count -
 					  (bmap_nr_new - bmap_nr)));
 	PUT_SB_BLOCK_COUNT(s, block_count_new);
-	PUT_SB_BMAP_NR(s, bmap_nr_new);
+	PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new);
 	s->s_dirt = 1;
 
 	journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 981027d..ca41567 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -559,7 +559,7 @@
 /* The function is NOT SCHEDULE-SAFE! */
 static void search_by_key_reada(struct super_block *s,
 				struct buffer_head **bh,
-				unsigned long *b, int num)
+				b_blocknr_t *b, int num)
 {
 	int i, j;
 
@@ -611,7 +611,7 @@
 					   DISK_LEAF_NODE_LEVEL */
     )
 {
-	int n_block_number;
+	b_blocknr_t n_block_number;
 	int expected_level;
 	struct buffer_head *p_s_bh;
 	struct path_element *p_s_last_element;
@@ -619,7 +619,7 @@
 	int right_neighbor_of_leaf_node;
 	int fs_gen;
 	struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
-	unsigned long reada_blocks[SEARCH_BY_KEY_READA];
+	b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
 	int reada_count = 0;
 
 #ifdef CONFIG_REISERFS_CHECK
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b82897a..57adfe9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1725,6 +1725,21 @@
 		set_sb_umount_state(rs, REISERFS_ERROR_FS);
 		set_sb_fs_state(rs, 0);
 
+		/* Clear out s_bmap_nr if it would wrap. We can handle this
+		 * case, but older revisions can't. This will cause the
+		 * file system to fail mount on those older implementations,
+		 * avoiding corruption. -jeffm */
+		if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
+		    sb_bmap_nr(rs) != 0) {
+			reiserfs_warning(s, "super-2030: This file system "
+					"claims to use %u bitmap blocks in "
+					"its super block, but requires %u. "
+					"Clearing to zero.", sb_bmap_nr(rs),
+					reiserfs_bmap_count(s));
+
+			set_sb_bmap_nr(rs, 0);
+		}
+
 		if (old_format_only(s)) {
 			/* filesystem of format 3.5 either with standard or non-standard
 			   journal */
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index fab4b9b..1597f6b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -484,7 +484,7 @@
 	/* Resize it so we're ok to write there */
 	newattrs.ia_size = buffer_size;
 	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-	mutex_lock(&xinode->i_mutex);
+	mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR);
 	err = notify_change(fp->f_path.dentry, &newattrs);
 	if (err)
 		goto out_filp;
@@ -1223,7 +1223,8 @@
 		if (!IS_ERR(dentry)) {
 			if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
 				struct inode *inode = dentry->d_parent->d_inode;
-				mutex_lock(&inode->i_mutex);
+				mutex_lock_nested(&inode->i_mutex,
+						  I_MUTEX_XATTR);
 				err = inode->i_op->mkdir(inode, dentry, 0700);
 				mutex_unlock(&inode->i_mutex);
 				if (err) {
diff --git a/fs/select.c b/fs/select.c
index 7dede89..47f4792 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -177,11 +177,6 @@
 	return max;
 }
 
-#define BIT(i)		(1UL << ((i)&(__NFDBITS-1)))
-#define MEM(i,m)	((m)+(unsigned)(i)/__NFDBITS)
-#define ISSET(i,m)	(((i)&*(m)) != 0)
-#define SET(i,m)	(*(m) |= (i))
-
 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
 #define POLLEX_SET (POLLPRI)
diff --git a/fs/super.c b/fs/super.c
index 1bfcca2..d28fde7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -40,10 +40,6 @@
 #include <asm/uaccess.h>
 
 
-void get_filesystem(struct file_system_type *fs);
-void put_filesystem(struct file_system_type *fs);
-struct file_system_type *get_fs_type(const char *name);
-
 LIST_HEAD(super_blocks);
 DEFINE_SPINLOCK(sb_lock);
 
@@ -336,21 +332,21 @@
 			void *data)
 {
 	struct super_block *s = NULL;
-	struct list_head *p;
+	struct super_block *old;
 	int err;
 
 retry:
 	spin_lock(&sb_lock);
-	if (test) list_for_each(p, &type->fs_supers) {
-		struct super_block *old;
-		old = list_entry(p, struct super_block, s_instances);
-		if (!test(old, data))
-			continue;
-		if (!grab_super(old))
-			goto retry;
-		if (s)
-			destroy_super(s);
-		return old;
+	if (test) {
+		list_for_each_entry(old, &type->fs_supers, s_instances) {
+			if (!test(old, data))
+				continue;
+			if (!grab_super(old))
+				goto retry;
+			if (s)
+				destroy_super(s);
+			return old;
+		}
 	}
 	if (!s) {
 		spin_unlock(&sb_lock);
@@ -948,9 +944,9 @@
 	return mnt;
 }
 
-struct vfsmount *kern_mount(struct file_system_type *type)
+struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
 {
-	return vfs_kern_mount(type, 0, type->name, NULL);
+	return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
 }
 
-EXPORT_SYMBOL(kern_mount);
+EXPORT_SYMBOL_GPL(kern_mount_data);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 726449d..3586c7a 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -54,8 +54,8 @@
 		struct dentry	*de),
 	void			*context)
 {
-	xfs_fid2_t		ifid;
-	xfs_fid2_t		pfid;
+	xfs_fid_t		ifid;
+	xfs_fid_t		pfid;
 	void			*parent = NULL;
 	int			is64 = 0;
 	__u32			*p = fh;
@@ -144,7 +144,7 @@
 	struct dentry		*result;
 	int			error;
 
-	error = xfs_vget(XFS_M(sb), &vp, (fid_t *)data);
+	error = xfs_vget(XFS_M(sb), &vp, data);
 	if (error || vp == NULL)
 		return ERR_PTR(-ESTALE) ;
 
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
index e794ca4..2f36071 100644
--- a/fs/xfs/linux-2.6/xfs_export.h
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -71,13 +71,13 @@
 
 /*
  * Decode encoded inode information (either for the inode itself
- * or the parent) into an xfs_fid2_t structure.  Advances and
+ * or the parent) into an xfs_fid_t structure.  Advances and
  * returns the new data pointer
  */
 static inline __u32 *
-xfs_fileid_decode_fid2(__u32 *p, xfs_fid2_t *fid, int is64)
+xfs_fileid_decode_fid2(__u32 *p, xfs_fid_t *fid, int is64)
 {
-	fid->fid_len = sizeof(xfs_fid2_t) - sizeof(fid->fid_len);
+	fid->fid_len = sizeof(xfs_fid_t) - sizeof(fid->fid_len);
 	fid->fid_pad = 0;
 	fid->fid_ino = *p++;
 #if XFS_BIG_INUMS
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index ffec630..2b34bad 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -152,11 +152,11 @@
 		lock_mode = xfs_ilock_map_shared(ip);
 
 		/* fill in fid section of handle from inode */
-		handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) -
-					    sizeof(handle.ha_fid.xfs_fid_len);
-		handle.ha_fid.xfs_fid_pad = 0;
-		handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen;
-		handle.ha_fid.xfs_fid_ino = ip->i_ino;
+		handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
+					sizeof(handle.ha_fid.fid_len);
+		handle.ha_fid.fid_pad = 0;
+		handle.ha_fid.fid_gen = ip->i_d.di_gen;
+		handle.ha_fid.fid_ino = ip->i_ino;
 
 		xfs_iunlock_map_shared(ip, lock_mode);
 
@@ -222,10 +222,10 @@
 	if (hlen < sizeof(*handlep))
 		memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
 	if (hlen > sizeof(handlep->ha_fsid)) {
-		if (handlep->ha_fid.xfs_fid_len !=
-				(hlen - sizeof(handlep->ha_fsid)
-					- sizeof(handlep->ha_fid.xfs_fid_len))
-		    || handlep->ha_fid.xfs_fid_pad)
+		if (handlep->ha_fid.fid_len !=
+		    (hlen - sizeof(handlep->ha_fsid) -
+		            sizeof(handlep->ha_fid.fid_len)) ||
+		    handlep->ha_fid.fid_pad)
 			return XFS_ERROR(EINVAL);
 	}
 
@@ -233,9 +233,9 @@
 	 * Crack the handle, obtain the inode # & generation #
 	 */
 	xfid = (struct xfs_fid *)&handlep->ha_fid;
-	if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) {
-		ino  = xfid->xfs_fid_ino;
-		igen = xfid->xfs_fid_gen;
+	if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) {
+		ino  = xfid->fid_ino;
+		igen = xfid->fid_gen;
 	} else {
 		return XFS_ERROR(EINVAL);
 	}
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index 6cd5704..a1e55fb 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -41,29 +41,16 @@
 xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
 {
 	if (args->flags & XFSMNT_DMAPI) {
-		struct xfs_dmops *ops;
-
-		ops = symbol_get(xfs_dmcore_xfs);
-		if (!ops) {
-			request_module("xfs_dmapi");
-			ops = symbol_get(xfs_dmcore_xfs);
-		}
-
-		if (!ops) {
-			cmn_err(CE_WARN, "XFS: no dmapi support available.");
-			return EINVAL;
-		}
-		mp->m_dm_ops = ops;
-	} else {
-		mp->m_dm_ops = &xfs_dmcore_stub;
+		cmn_err(CE_WARN,
+			"XFS: dmapi support not available in this kernel.");
+		return EINVAL;
 	}
 
+	mp->m_dm_ops = &xfs_dmcore_stub;
 	return 0;
 }
 
 void
 xfs_dmops_put(struct xfs_mount *mp)
 {
-	if (mp->m_dm_ops != &xfs_dmcore_stub)
-		symbol_put(xfs_dmcore_xfs);
 }
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index ec3c9c2..aab9662 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -389,30 +389,13 @@
  */
 typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */
 
-
-#ifndef HAVE_FID
-#define MAXFIDSZ	46
-
-typedef struct fid {
-	__u16		fid_len;		/* length of data in bytes */
-	unsigned char	fid_data[MAXFIDSZ];	/* data (fid_len worth)  */
-} fid_t;
-#endif
-
 typedef struct xfs_fid {
-	__u16	xfs_fid_len;		/* length of remainder	*/
-	__u16	xfs_fid_pad;
-	__u32	xfs_fid_gen;		/* generation number	*/
-	__u64	xfs_fid_ino;		/* 64 bits inode number */
+	__u16	fid_len;		/* length of remainder	*/
+	__u16	fid_pad;
+	__u32	fid_gen;		/* generation number	*/
+	__u64	fid_ino;		/* 64 bits inode number */
 } xfs_fid_t;
 
-typedef struct xfs_fid2 {
-	__u16	fid_len;	/* length of remainder */
-	__u16	fid_pad;	/* padding, must be zero */
-	__u32	fid_gen;	/* generation number */
-	__u64	fid_ino;	/* inode number */
-} xfs_fid2_t;
-
 typedef struct xfs_handle {
 	union {
 		__s64	    align;	/* force alignment of ha_fid	 */
@@ -422,9 +405,9 @@
 } xfs_handle_t;
 #define ha_fsid ha_u._ha_fsid
 
-#define XFS_HSIZE(handle)	(((char *) &(handle).ha_fid.xfs_fid_pad	 \
+#define XFS_HSIZE(handle)	(((char *) &(handle).ha_fid.fid_pad	 \
 				 - (char *) &(handle))			  \
-				 + (handle).ha_fid.xfs_fid_len)
+				 + (handle).ha_fid.fid_len)
 
 /*
  * Flags for going down operation
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index c266a01..2ec1d8a 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -135,19 +135,13 @@
 xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
 {
 	if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) {
-		struct xfs_qmops *ops;
-
-		ops = symbol_get(xfs_qmcore_xfs);
-		if (!ops) {
-			request_module("xfs_quota");
-			ops = symbol_get(xfs_qmcore_xfs);
-		}
-
-		if (!ops) {
-			cmn_err(CE_WARN, "XFS: no quota support available.");
-			return EINVAL;
-		}
-		mp->m_qm_ops = ops;
+#ifdef CONFIG_XFS_QUOTA
+		mp->m_qm_ops = &xfs_qmcore_xfs;
+#else
+		cmn_err(CE_WARN,
+			"XFS: qouta support not available in this kernel.");
+		return EINVAL;
+#endif
 	} else {
 		mp->m_qm_ops = &xfs_qmcore_stub;
 	}
@@ -158,6 +152,4 @@
 void
 xfs_qmops_put(struct xfs_mount *mp)
 {
-	if (mp->m_qm_ops != &xfs_qmcore_stub)
-		symbol_put(xfs_qmcore_xfs);
 }
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index a5a8454..a154459 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1635,9 +1635,8 @@
 xfs_vget(
 	xfs_mount_t	*mp,
 	bhv_vnode_t	**vpp,
-	fid_t		*fidp)
+	xfs_fid_t	*xfid)
 {
-	xfs_fid_t	*xfid = (struct xfs_fid *)fidp;
 	xfs_inode_t	*ip;
 	int		error;
 	xfs_ino_t	ino;
@@ -1647,11 +1646,11 @@
 	 * Invalid.  Since handles can be created in user space and passed in
 	 * via gethandle(), this is not cause for a panic.
 	 */
-	if (xfid->xfs_fid_len != sizeof(*xfid) - sizeof(xfid->xfs_fid_len))
+	if (xfid->fid_len != sizeof(*xfid) - sizeof(xfid->fid_len))
 		return XFS_ERROR(EINVAL);
 
-	ino  = xfid->xfs_fid_ino;
-	igen = xfid->xfs_fid_gen;
+	ino  = xfid->fid_ino;
+	igen = xfid->fid_gen;
 
 	/*
 	 * NFS can sometimes send requests for ino 0.  Fail them gracefully.
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index bc99e3e..a592fe0 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -2,7 +2,7 @@
 #define _XFS_VFSOPS_H 1
 
 struct cred;
-struct fid;
+struct xfs_fid;
 struct inode;
 struct kstatfs;
 struct xfs_mount;
@@ -17,7 +17,7 @@
 int xfs_statvfs(struct xfs_mount *mp, struct kstatfs *statp,
 		bhv_vnode_t *vp);
 int xfs_sync(struct xfs_mount *mp, int flags);
-int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct fid *fidp);
+int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct xfs_fid *xfid);
 int xfs_parseargs(struct xfs_mount *mp, char *options,
 		struct xfs_mount_args *args, int update);
 int xfs_showargs(struct xfs_mount *mp, struct seq_file *m);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 5e3c57c..efd5aff 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3466,23 +3466,14 @@
 }
 
 
-/*
- * xfs_fid2
- *
- * A fid routine that takes a pointer to a previously allocated
- * fid structure (like xfs_fast_fid) but uses a 64 bit inode number.
- */
 int
 xfs_fid2(
 	xfs_inode_t	*ip,
-	fid_t		*fidp)
+	xfs_fid_t	*xfid)
 {
-	xfs_fid2_t	*xfid = (xfs_fid2_t *)fidp;
-
 	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
-	ASSERT(sizeof(fid_t) >= sizeof(xfs_fid2_t));
 
-	xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len);
+	xfid->fid_len = sizeof(xfs_fid_t) - sizeof(xfid->fid_len);
 	xfid->fid_pad = 0;
 	/*
 	 * use memcpy because the inode is a long long and there's no
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index f36e74f..b7e461c 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -39,7 +39,7 @@
 int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry,
 		char *target_path, mode_t mode, bhv_vnode_t **vpp,
 		struct cred *credp);
-int xfs_fid2(struct xfs_inode *ip, fid_t	*fidp);
+int xfs_fid2(struct xfs_inode *ip, struct xfs_fid *xfid);
 int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 int xfs_inode_flush(struct xfs_inode *ip, int flags);
diff --git a/include/acpi/achware.h b/include/acpi/achware.h
index 9df275c..4053df9 100644
--- a/include/acpi/achware.h
+++ b/include/acpi/achware.h
@@ -71,9 +71,9 @@
 struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id);
 
 acpi_status
-acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value);
+acpi_hw_register_read(u32 register_id, u32 * return_value);
 
-acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value);
+acpi_status acpi_hw_register_write(u32 register_id, u32 value);
 
 acpi_status
 acpi_hw_low_level_read(u32 width,
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 86aea44..7b74b60 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -264,7 +264,6 @@
 
 struct acpi_device_wakeup_state {
 	u8 enabled:1;
-	u8 active:1;
 };
 
 struct acpi_device_wakeup {
@@ -333,6 +332,7 @@
 int acpi_bus_set_power(acpi_handle handle, int state);
 #ifdef CONFIG_ACPI_PROC_EVENT
 int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data);
+int acpi_bus_generate_proc_event4(const char *class, const char *bid, u8 type, int data);
 int acpi_bus_receive_event(struct acpi_bus_event *event);
 #else
 static inline int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data)
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 3d7ab9e..9512f04 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -314,6 +314,8 @@
  */
 acpi_status acpi_get_register(u32 register_id, u32 * return_value);
 
+acpi_status acpi_get_register_unlocked(u32 register_id, u32 *return_value);
+
 acpi_status acpi_set_register(u32 register_id, u32 value);
 
 acpi_status
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 99934a9..26d79f6 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -3,6 +3,7 @@
 
 #include <linux/kernel.h>
 #include <linux/cpu.h>
+#include <linux/cpuidle.h>
 
 #include <asm/acpi.h>
 
@@ -75,7 +76,9 @@
 };
 
 struct acpi_processor_power {
+	struct cpuidle_device dev;
 	struct acpi_processor_cx *state;
+	struct acpi_processor_cx *bm_state;
 	unsigned long bm_check_timestamp;
 	u32 default_state;
 	u32 bm_activity;
@@ -199,6 +202,7 @@
 	u8 bm_check:1;
 	u8 has_cst:1;
 	u8 power_setup_done:1;
+	u8 bm_rld_set:1;
 };
 
 struct acpi_processor {
@@ -322,6 +326,7 @@
 			      struct acpi_device *device);
 int acpi_processor_suspend(struct acpi_device * device, pm_message_t state);
 int acpi_processor_resume(struct acpi_device * device);
+extern struct cpuidle_driver acpi_idle_driver;
 
 /* in processor_thermal.c */
 int acpi_processor_get_limit_info(struct acpi_processor *pr);
diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 381b4f5..9e19a70 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -1,6 +1,10 @@
 #ifndef _ALPHA_BITOPS_H
 #define _ALPHA_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/compiler.h>
 #include <asm/barrier.h>
 
diff --git a/include/asm-alpha/ide.h b/include/asm-alpha/ide.h
index 2a5cc0b..b7bf68d 100644
--- a/include/asm-alpha/ide.h
+++ b/include/asm-alpha/ide.h
@@ -40,7 +40,6 @@
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_PCI
diff --git a/include/asm-alpha/tlbflush.h b/include/asm-alpha/tlbflush.h
index 1ca3ed3..eefab3f 100644
--- a/include/asm-alpha/tlbflush.h
+++ b/include/asm-alpha/tlbflush.h
@@ -92,17 +92,6 @@
 	if (*mmc) *mmc = 0;
 }
 
-/* Flush a specified range of user mapping page tables from TLB.
-   Although Alpha uses VPTE caches, this can be a nop, as Alpha does
-   not have finegrained tlb flushing, so it will flush VPTE stuff
-   during next flush_tlb_range.  */
-
-static inline void
-flush_tlb_pgtables(struct mm_struct *mm, unsigned long start,
-		   unsigned long end)
-{
-}
-
 #ifndef CONFIG_SMP
 /* Flush everything (kernel mapping may also have changed
    due to vmalloc/vfree).  */
diff --git a/include/asm-arm/arch-ixp4xx/io.h b/include/asm-arm/arch-ixp4xx/io.h
index c72f9d7..eeeea90 100644
--- a/include/asm-arm/arch-ixp4xx/io.h
+++ b/include/asm-arm/arch-ixp4xx/io.h
@@ -17,9 +17,6 @@
 
 #define IO_SPACE_LIMIT 0xffff0000
 
-#define	BIT(x)	((1)<<(x))
-
-
 extern int (*ixp4xx_pci_read)(u32 addr, u32 cmd, u32* data);
 extern int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data);
 
diff --git a/include/asm-arm/bitops.h b/include/asm-arm/bitops.h
index 52fe058..47a6b08 100644
--- a/include/asm-arm/bitops.h
+++ b/include/asm-arm/bitops.h
@@ -19,6 +19,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/system.h>
 
diff --git a/include/asm-arm/ide.h b/include/asm-arm/ide.h
index 4f68c8a..f348fcf 100644
--- a/include/asm-arm/ide.h
+++ b/include/asm-arm/ide.h
@@ -18,7 +18,6 @@
 #endif
 
 #if !defined(CONFIG_ARCH_L7200)
-# define IDE_ARCH_OBSOLETE_INIT
 # ifdef CONFIG_ARCH_CLPS7500
 #  define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 # else
diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h
index 71be4fd..8c6bc1b 100644
--- a/include/asm-arm/tlbflush.h
+++ b/include/asm-arm/tlbflush.h
@@ -463,11 +463,6 @@
  */
 extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte);
 
-/*
- * ARM processors do not cache TLB tables in RAM.
- */
-#define flush_tlb_pgtables(mm,start,end)	do { } while (0)
-
 #endif
 
 #endif /* CONFIG_MMU */
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
index f3faddf..1a50b69 100644
--- a/include/asm-avr32/bitops.h
+++ b/include/asm-avr32/bitops.h
@@ -8,6 +8,10 @@
 #ifndef __ASM_AVR32_BITOPS_H
 #define __ASM_AVR32_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/byteorder.h>
 #include <asm/system.h>
 
diff --git a/include/asm-avr32/tlbflush.h b/include/asm-avr32/tlbflush.h
index 730e268..5bc7c88 100644
--- a/include/asm-avr32/tlbflush.h
+++ b/include/asm-avr32/tlbflush.h
@@ -19,7 +19,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 extern void flush_tlb(void);
 extern void flush_tlb_all(void);
@@ -29,12 +28,6 @@
 extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
 extern void __flush_tlb_page(unsigned long asid, unsigned long page);
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	/* Nothing to do */
-}
-
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 #endif /* __ASM_AVR32_TLBFLUSH_H */
diff --git a/include/asm-blackfin/bitops.h b/include/asm-blackfin/bitops.h
index 03ecedc..b39a175 100644
--- a/include/asm-blackfin/bitops.h
+++ b/include/asm-blackfin/bitops.h
@@ -11,6 +11,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/sched.h>
diff --git a/include/asm-blackfin/ide.h b/include/asm-blackfin/ide.h
index 41b2db4..121e272 100644
--- a/include/asm-blackfin/ide.h
+++ b/include/asm-blackfin/ide.h
@@ -20,7 +20,6 @@
 #define MAX_HWIFS	1
 
 /* Legacy ... BLK_DEV_IDECS */
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 
diff --git a/include/asm-blackfin/tlbflush.h b/include/asm-blackfin/tlbflush.h
index 10a07ba..277b400 100644
--- a/include/asm-blackfin/tlbflush.h
+++ b/include/asm-blackfin/tlbflush.h
@@ -53,10 +53,4 @@
 	BUG();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	BUG();
-}
-
 #endif
diff --git a/include/asm-cris/arch-v32/ide.h b/include/asm-cris/arch-v32/ide.h
index 6590f65..1129617 100644
--- a/include/asm-cris/arch-v32/ide.h
+++ b/include/asm-cris/arch-v32/ide.h
@@ -54,7 +54,7 @@
 #define SUPPORT_VLB_SYNC 0
 
 #define IDE_ARCH_ACK_INTR
-#define ide_ack_intr(hwif)	(hwif)->hw.ack_intr(hwif)
+#define ide_ack_intr(hwif)	((hwif)->ack_intr(hwif))
 
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h
index 617151b..e2f49c2 100644
--- a/include/asm-cris/bitops.h
+++ b/include/asm-cris/bitops.h
@@ -14,6 +14,10 @@
 /* Currently this is unsuitable for consumption outside the kernel.  */
 #ifdef __KERNEL__ 
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/arch/bitops.h>
 #include <asm/system.h>
 #include <asm/atomic.h>
diff --git a/include/asm-cris/posix_types.h b/include/asm-cris/posix_types.h
index 7b9ed22..92000d0 100644
--- a/include/asm-cris/posix_types.h
+++ b/include/asm-cris/posix_types.h
@@ -52,7 +52,7 @@
 } __kernel_fsid_t;
 
 #ifdef __KERNEL__
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 #undef	__FD_SET
 #define __FD_SET(fd,fdsetp) set_bit(fd, (void *)(fdsetp))
diff --git a/include/asm-cris/tlbflush.h b/include/asm-cris/tlbflush.h
index 0569612..20697e7 100644
--- a/include/asm-cris/tlbflush.h
+++ b/include/asm-cris/tlbflush.h
@@ -38,13 +38,6 @@
 	flush_tlb_mm(vma->vm_mm);
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                      unsigned long start, unsigned long end)
-{
-        /* CRIS does not keep any page table caches in TLB */
-}
-
-
 static inline void flush_tlb(void)
 {
 	flush_tlb_mm(current->mm);
diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index 8dba74b..e29de71 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -21,6 +21,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/ffz.h>
 
 /*
diff --git a/include/asm-frv/tlbflush.h b/include/asm-frv/tlbflush.h
index 8370f97..7ac5eaf 100644
--- a/include/asm-frv/tlbflush.h
+++ b/include/asm-frv/tlbflush.h
@@ -57,7 +57,6 @@
 #define __flush_tlb_global()			flush_tlb_all()
 #define flush_tlb()				flush_tlb_all()
 #define flush_tlb_kernel_range(start, end)	flush_tlb_all()
-#define flush_tlb_pgtables(mm,start,end)	do { } while(0)
 
 #else
 
@@ -66,7 +65,6 @@
 #define flush_tlb_mm(mm)			BUG()
 #define flush_tlb_page(vma,addr)		BUG()
 #define flush_tlb_range(mm,start,end)		BUG()
-#define flush_tlb_pgtables(mm,start,end)	BUG()
 #define flush_tlb_kernel_range(start, end)	BUG()
 
 #endif
diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index e022a0f..15e6f25 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -19,6 +19,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index cd8a964..4657f3e 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -3,9 +3,6 @@
 
 #include <asm/types.h>
 
-#define BITOP_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-
 #ifdef CONFIG_SMP
 #include <asm/spinlock.h>
 #include <asm/cache.h>		/* we use L1_CACHE_BYTES */
@@ -66,8 +63,8 @@
  */
 static inline void set_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long flags;
 
 	_atomic_spin_lock_irqsave(p, flags);
@@ -87,8 +84,8 @@
  */
 static inline void clear_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long flags;
 
 	_atomic_spin_lock_irqsave(p, flags);
@@ -108,8 +105,8 @@
  */
 static inline void change_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long flags;
 
 	_atomic_spin_lock_irqsave(p, flags);
@@ -128,8 +125,8 @@
  */
 static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old;
 	unsigned long flags;
 
@@ -152,8 +149,8 @@
  */
 static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old;
 	unsigned long flags;
 
@@ -175,8 +172,8 @@
  */
 static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old;
 	unsigned long flags;
 
diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h
index 46a825c..697cc2b 100644
--- a/include/asm-generic/bitops/non-atomic.h
+++ b/include/asm-generic/bitops/non-atomic.h
@@ -3,9 +3,6 @@
 
 #include <asm/types.h>
 
-#define BITOP_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-
 /**
  * __set_bit - Set a bit in memory
  * @nr: the bit to set
@@ -17,16 +14,16 @@
  */
 static inline void __set_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 
 	*p  |= mask;
 }
 
 static inline void __clear_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 
 	*p &= ~mask;
 }
@@ -42,8 +39,8 @@
  */
 static inline void __change_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 
 	*p ^= mask;
 }
@@ -59,8 +56,8 @@
  */
 static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old = *p;
 
 	*p = old | mask;
@@ -78,8 +75,8 @@
  */
 static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old = *p;
 
 	*p = old & ~mask;
@@ -90,8 +87,8 @@
 static inline int __test_and_change_bit(int nr,
 					    volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old = *p;
 
 	*p = old ^ mask;
@@ -105,7 +102,7 @@
  */
 static inline int test_bit(int nr, const volatile unsigned long *addr)
 {
-	return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
 }
 
 #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5615440..9f584cc 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -12,7 +12,11 @@
 /* .data section */
 #define DATA_DATA							\
 	*(.data)							\
-	*(.data.init.refok)
+	*(.data.init.refok)						\
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__start___markers) = .;				\
+	*(__markers)							\
+	VMLINUX_SYMBOL(__stop___markers) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -20,6 +24,7 @@
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
 		*(__vermagic)		/* Kernel version magic */	\
+		*(__markers_strings)	/* Markers: strings */		\
 	}								\
 									\
 	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {		\
diff --git a/include/asm-h8300/bitops.h b/include/asm-h8300/bitops.h
index e64ad31..cb18e3b 100644
--- a/include/asm-h8300/bitops.h
+++ b/include/asm-h8300/bitops.h
@@ -10,6 +10,11 @@
 #include <asm/system.h>
 
 #ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 /*
  * Function prototypes to keep gcc -Wall happy
  */
diff --git a/include/asm-h8300/tlbflush.h b/include/asm-h8300/tlbflush.h
index 9a2c5c9..41c148a 100644
--- a/include/asm-h8300/tlbflush.h
+++ b/include/asm-h8300/tlbflush.h
@@ -52,10 +52,4 @@
 	BUG();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	BUG();
-}
-
 #endif /* _H8300_TLBFLUSH_H */
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 2144f1a..a977aff 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -9,6 +9,10 @@
  * O(1) scheduler patch
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/intrinsics.h>
diff --git a/include/asm-ia64/cacheflush.h b/include/asm-ia64/cacheflush.h
index 4906916..afcfbda 100644
--- a/include/asm-ia64/cacheflush.h
+++ b/include/asm-ia64/cacheflush.h
@@ -7,8 +7,8 @@
  */
 
 #include <linux/page-flags.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/page.h>
 
 /*
diff --git a/include/asm-ia64/ide.h b/include/asm-ia64/ide.h
index e928675..1ccf238 100644
--- a/include/asm-ia64/ide.h
+++ b/include/asm-ia64/ide.h
@@ -46,7 +46,6 @@
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_PCI
diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
index 3a62878..f93308f 100644
--- a/include/asm-ia64/meminit.h
+++ b/include/asm-ia64/meminit.h
@@ -35,7 +35,7 @@
 extern void reserve_memory (void);
 extern void find_initrd (void);
 extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
-extern void efi_memmap_init(unsigned long *, unsigned long *);
+extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
 extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
 
 extern unsigned long vmcore_find_descriptor_size(unsigned long address);
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index 0971ec9..e6204f14 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -150,7 +150,7 @@
 # ifndef __ASSEMBLY__
 
 #include <linux/sched.h>	/* for mm_struct */
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 1703c9d..471cc2e 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -14,8 +14,8 @@
 #include <linux/threads.h>
 #include <linux/kernel.h>
 #include <linux/cpumask.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/param.h>
 #include <asm/processor.h>
diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h
index ff857e3..0229fb9 100644
--- a/include/asm-ia64/spinlock.h
+++ b/include/asm-ia64/spinlock.h
@@ -11,9 +11,9 @@
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/bitops.h>
 
 #include <asm/atomic.h>
-#include <asm/bitops.h>
 #include <asm/intrinsics.h>
 #include <asm/system.h>
 
diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
index e37f9fb..80bcb0a 100644
--- a/include/asm-ia64/tlbflush.h
+++ b/include/asm-ia64/tlbflush.h
@@ -84,19 +84,6 @@
 }
 
 /*
- * Flush the TLB entries mapping the virtually mapped linear page
- * table corresponding to address range [START-END).
- */
-static inline void
-flush_tlb_pgtables (struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	/*
-	 * Deprecated.  The virtual page table is now flushed via the normal gather/flush
-	 * interface (see tlb.h).
-	 */
-}
-
-/*
  * Flush the local TLB. Invoked from another cpu using an IPI.
  */
 #ifdef CONFIG_SMP
diff --git a/include/asm-m32r/bitops.h b/include/asm-m32r/bitops.h
index 313a02c4..6dc9b81 100644
--- a/include/asm-m32r/bitops.h
+++ b/include/asm-m32r/bitops.h
@@ -11,6 +11,10 @@
  *    Copyright (C) 2004  Hirokazu Takata <takata at linux-m32r.org>
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/assembler.h>
 #include <asm/system.h>
diff --git a/include/asm-m32r/ide.h b/include/asm-m32r/ide.h
index 4672a49..5d2044e 100644
--- a/include/asm-m32r/ide.h
+++ b/include/asm-m32r/ide.h
@@ -65,7 +65,6 @@
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_BLK_DEV_IDEPCI
diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h
index 92d7266..8650538 100644
--- a/include/asm-m32r/pgtable.h
+++ b/include/asm-m32r/pgtable.h
@@ -21,9 +21,9 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/threads.h>
+#include <linux/bitops.h>
 #include <asm/processor.h>
 #include <asm/addrspace.h>
-#include <asm/bitops.h>
 #include <asm/page.h>
 
 struct mm_struct;
diff --git a/include/asm-m32r/tlbflush.h b/include/asm-m32r/tlbflush.h
index 3d37ac0..0ef9530 100644
--- a/include/asm-m32r/tlbflush.h
+++ b/include/asm-m32r/tlbflush.h
@@ -12,7 +12,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 extern void local_flush_tlb_all(void);
@@ -93,8 +92,6 @@
 	);
 }
 
-#define flush_tlb_pgtables(mm, start, end)	do { } while (0)
-
 extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
 
 #endif	/* _ASM_M32R_TLBFLUSH_H */
diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h
index da151f7..2976b5d 100644
--- a/include/asm-m68k/bitops.h
+++ b/include/asm-m68k/bitops.h
@@ -8,6 +8,10 @@
  * for more details.
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 
 /*
diff --git a/include/asm-m68k/ide.h b/include/asm-m68k/ide.h
index f9ffb2c..909c6df 100644
--- a/include/asm-m68k/ide.h
+++ b/include/asm-m68k/ide.h
@@ -137,7 +137,7 @@
 #endif /* CONFIG_BLK_DEV_FALCON_IDE */
 
 #define IDE_ARCH_ACK_INTR
-#define ide_ack_intr(hwif)	((hwif)->hw.ack_intr ? (hwif)->hw.ack_intr(hwif) : 1)
+#define ide_ack_intr(hwif)	((hwif)->ack_intr ? (hwif)->ack_intr(hwif) : 1)
 
 #endif /* __KERNEL__ */
 #endif /* _M68K_IDE_H */
diff --git a/include/asm-m68k/tlbflush.h b/include/asm-m68k/tlbflush.h
index 3167883..17707ec 100644
--- a/include/asm-m68k/tlbflush.h
+++ b/include/asm-m68k/tlbflush.h
@@ -92,11 +92,6 @@
 	flush_tlb_all();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
-
 #else
 
 
@@ -219,11 +214,6 @@
 	sun3_put_segmap (addr & ~(SUN3_PMEG_SIZE - 1), SUN3_INVALID_PMEG);
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
-
 #endif
 
 #endif /* _M68K_TLBFLUSH_H */
diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h
index b8b2770..f8dfb7b 100644
--- a/include/asm-m68knommu/bitops.h
+++ b/include/asm-m68knommu/bitops.h
@@ -10,6 +10,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/sched.h>
diff --git a/include/asm-m68knommu/tlbflush.h b/include/asm-m68knommu/tlbflush.h
index de858db..a470cfb 100644
--- a/include/asm-m68knommu/tlbflush.h
+++ b/include/asm-m68knommu/tlbflush.h
@@ -52,10 +52,4 @@
 	BUG();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	BUG();
-}
-
 #endif /* _M68KNOMMU_TLBFLUSH_H */
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index 77ed0c7..ec75ce4 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -9,6 +9,10 @@
 #ifndef _ASM_BITOPS_H
 #define _ASM_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <linux/irqflags.h>
 #include <linux/types.h>
diff --git a/include/asm-mips/fpu.h b/include/asm-mips/fpu.h
index 483685b..e59d4c0 100644
--- a/include/asm-mips/fpu.h
+++ b/include/asm-mips/fpu.h
@@ -12,12 +12,12 @@
 
 #include <linux/sched.h>
 #include <linux/thread_info.h>
+#include <linux/bitops.h>
 
 #include <asm/mipsregs.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
 #include <asm/hazards.h>
-#include <asm/bitops.h>
 #include <asm/processor.h>
 #include <asm/current.h>
 
diff --git a/include/asm-mips/ip32/crime.h b/include/asm-mips/ip32/crime.h
index a13702f..7c36b0e 100644
--- a/include/asm-mips/ip32/crime.h
+++ b/include/asm-mips/ip32/crime.h
@@ -17,9 +17,6 @@
  */
 #define CRIME_BASE	0x14000000	/* physical */
 
-#undef BIT
-#define BIT(x)	(1UL << (x))
-
 struct sgi_crime {
 	volatile unsigned long id;
 #define CRIME_ID_MASK			0xff
diff --git a/include/asm-mips/ip32/mace.h b/include/asm-mips/ip32/mace.h
index 990082c..d08d7c6 100644
--- a/include/asm-mips/ip32/mace.h
+++ b/include/asm-mips/ip32/mace.h
@@ -17,9 +17,6 @@
  */
 #define MACE_BASE	0x1f000000	/* physical */
 
-#undef BIT
-#define BIT(x) (1UL << (x))
-
 /*
  * PCI interface
  */
diff --git a/include/asm-mips/mach-generic/ide.h b/include/asm-mips/mach-generic/ide.h
index a771283..4ec2b93 100644
--- a/include/asm-mips/mach-generic/ide.h
+++ b/include/asm-mips/mach-generic/ide.h
@@ -98,7 +98,6 @@
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_BLK_DEV_IDEPCI
diff --git a/include/asm-mips/sni.h b/include/asm-mips/sni.h
index 4d43dbb..af08145 100644
--- a/include/asm-mips/sni.h
+++ b/include/asm-mips/sni.h
@@ -141,8 +141,6 @@
 #define A20R_PT_TIM0_ACK        0xbc050000
 #define A20R_PT_TIM1_ACK        0xbc060000
 
-#define SNI_MIPS_IRQ_CPU_TIMER  (MIPS_CPU_IRQ_BASE+7)
-
 #define SNI_A20R_IRQ_BASE       MIPS_CPU_IRQ_BASE
 #define SNI_A20R_IRQ_TIMER      (SNI_A20R_IRQ_BASE+5)
 
diff --git a/include/asm-mips/time.h b/include/asm-mips/time.h
index cf76f4f..bc47af3 100644
--- a/include/asm-mips/time.h
+++ b/include/asm-mips/time.h
@@ -21,6 +21,7 @@
 #include <linux/ptrace.h>
 #include <linux/rtc.h>
 #include <linux/spinlock.h>
+#include <linux/clockchips.h>
 #include <linux/clocksource.h>
 
 extern spinlock_t rtc_lock;
@@ -83,4 +84,8 @@
 }
 #endif
 
+extern void clocksource_set_clock(struct clocksource *cs, unsigned int clock);
+extern void clockevent_set_clock(struct clock_event_device *cd,
+		unsigned int clock);
+
 #endif /* _ASM_TIME_H */
diff --git a/include/asm-mips/tlbflush.h b/include/asm-mips/tlbflush.h
index 730e841f..86b21de 100644
--- a/include/asm-mips/tlbflush.h
+++ b/include/asm-mips/tlbflush.h
@@ -11,7 +11,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 extern void local_flush_tlb_all(void);
 extern void local_flush_tlb_mm(struct mm_struct *mm);
@@ -45,10 +44,4 @@
 
 #endif /* CONFIG_SMP */
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-	unsigned long start, unsigned long end)
-{
-	/* Nothing to do on MIPS.  */
-}
-
 #endif /* __ASM_TLBFLUSH_H */
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
index 03ae287..f8eebcb 100644
--- a/include/asm-parisc/bitops.h
+++ b/include/asm-parisc/bitops.h
@@ -1,6 +1,10 @@
 #ifndef _PARISC_BITOPS_H
 #define _PARISC_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/types.h>		/* for BITS_PER_LONG/SHIFT_PER_LONG */
 #include <asm/byteorder.h>
diff --git a/include/asm-parisc/ide.h b/include/asm-parisc/ide.h
index b27bf7a..be8760f 100644
--- a/include/asm-parisc/ide.h
+++ b/include/asm-parisc/ide.h
@@ -17,7 +17,6 @@
 #define MAX_HWIFS	2
 #endif
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h
index e88cacd..9ab79c8 100644
--- a/include/asm-parisc/pgtable.h
+++ b/include/asm-parisc/pgtable.h
@@ -11,9 +11,9 @@
  */
 
 #include <linux/mm.h>		/* for vm_area_struct */
+#include <linux/bitops.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
-#include <asm/bitops.h>
 
 /*
  * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
diff --git a/include/asm-parisc/tlbflush.h b/include/asm-parisc/tlbflush.h
index 270cf30..b72ec66 100644
--- a/include/asm-parisc/tlbflush.h
+++ b/include/asm-parisc/tlbflush.h
@@ -57,10 +57,6 @@
 #endif
 }
 
-extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-}
- 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 	unsigned long addr)
 {
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index e85c3e0..733b4af 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -38,6 +38,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/asm-compat.h>
 #include <asm/synch.h>
diff --git a/include/asm-powerpc/ide.h b/include/asm-powerpc/ide.h
index 1644e44..fd7f5a4 100644
--- a/include/asm-powerpc/ide.h
+++ b/include/asm-powerpc/ide.h
@@ -69,12 +69,11 @@
 
 #ifdef CONFIG_BLK_DEV_MPC8xx_IDE
 #define IDE_ARCH_ACK_INTR  1
-#define ide_ack_intr(hwif) (hwif->hw.ack_intr ? hwif->hw.ack_intr(hwif) : 1)
+#define ide_ack_intr(hwif) ((hwif)->ack_intr ? (hwif)->ack_intr(hwif) : 1)
 #endif
 
 #endif /* __powerpc64__ */
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h
index 870967e..4a82fdc 100644
--- a/include/asm-powerpc/iommu.h
+++ b/include/asm-powerpc/iommu.h
@@ -26,9 +26,9 @@
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/bitops.h>
 #include <asm/machdep.h>
 #include <asm/types.h>
-#include <asm/bitops.h>
 
 #define IOMMU_PAGE_SHIFT      12
 #define IOMMU_PAGE_SIZE       (ASM_CONST(1) << IOMMU_PAGE_SHIFT)
diff --git a/include/asm-powerpc/mmu_context.h b/include/asm-powerpc/mmu_context.h
index f863ac2..9102b8b 100644
--- a/include/asm-powerpc/mmu_context.h
+++ b/include/asm-powerpc/mmu_context.h
@@ -8,7 +8,7 @@
 
 #ifndef CONFIG_PPC64
 #include <asm/atomic.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 /*
  * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index a022f80..b6b036c 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -8,7 +8,6 @@
  *  - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
@@ -174,15 +173,5 @@
  */
 extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
 
-/*
- * This is called in munmap when we have freed up some page-table
- * pages.  We don't need to do anything here, there's nothing special
- * about our page-table pages.  -- paulus
- */
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
-
 #endif /*__KERNEL__ */
 #endif /* _ASM_POWERPC_TLBFLUSH_H */
diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h
index a6441a0..b2e25d8 100644
--- a/include/asm-ppc/mmu_context.h
+++ b/include/asm-ppc/mmu_context.h
@@ -2,8 +2,9 @@
 #ifndef __PPC_MMU_CONTEXT_H
 #define __PPC_MMU_CONTEXT_H
 
+#include <linux/bitops.h>
+
 #include <asm/atomic.h>
-#include <asm/bitops.h>
 #include <asm/mmu.h>
 #include <asm/cputable.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index d756b34..34d9a63 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -15,6 +15,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 
 /*
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index 66793f5..6de2632 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -14,7 +14,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 /*
@@ -152,10 +151,4 @@
 
 #endif
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                      unsigned long start, unsigned long end)
-{
-        /* S/390 does not keep any page table caches in TLB */
-}
-
 #endif /* _S390_TLBFLUSH_H */
diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h
index 9d70217..df805f2 100644
--- a/include/asm-sh/bitops.h
+++ b/include/asm-sh/bitops.h
@@ -2,6 +2,11 @@
 #define __ASM_SH_BITOPS_H
 
 #ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/system.h>
 /* For __swab32 */
 #include <asm/byteorder.h>
diff --git a/include/asm-sh/tlbflush.h b/include/asm-sh/tlbflush.h
index 455fb8d..e0ac972 100644
--- a/include/asm-sh/tlbflush.h
+++ b/include/asm-sh/tlbflush.h
@@ -9,7 +9,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 extern void local_flush_tlb_all(void);
 extern void local_flush_tlb_mm(struct mm_struct *mm);
@@ -47,9 +46,4 @@
 
 #endif /* CONFIG_SMP */
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	/* Nothing to do */
-}
 #endif /* __ASM_SH_TLBFLUSH_H */
diff --git a/include/asm-sh64/bitops.h b/include/asm-sh64/bitops.h
index 444d5ea..600c59e 100644
--- a/include/asm-sh64/bitops.h
+++ b/include/asm-sh64/bitops.h
@@ -13,6 +13,11 @@
  */
 
 #ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/system.h>
 /* For __swab32 */
diff --git a/include/asm-sh64/ide.h b/include/asm-sh64/ide.h
index c9d84d5..b6e31e8 100644
--- a/include/asm-sh64/ide.h
+++ b/include/asm-sh64/ide.h
@@ -19,7 +19,6 @@
 /* Without this, the initialisation of PCI IDE cards end up calling
  * ide_init_hwif_ports, which won't work. */
 #ifdef CONFIG_BLK_DEV_IDEPCI
-#define IDE_ARCH_OBSOLETE_INIT 1
 #define ide_default_io_ctl(base)	(0)
 #endif
 
diff --git a/include/asm-sh64/tlbflush.h b/include/asm-sh64/tlbflush.h
index e45bead..16a164a 100644
--- a/include/asm-sh64/tlbflush.h
+++ b/include/asm-sh64/tlbflush.h
@@ -20,10 +20,6 @@
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			    unsigned long end);
 extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
 
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
diff --git a/include/asm-sparc/bitops.h b/include/asm-sparc/bitops.h
index 00bd0a6..cb3cefa 100644
--- a/include/asm-sparc/bitops.h
+++ b/include/asm-sparc/bitops.h
@@ -14,6 +14,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 extern unsigned long ___set_bit(unsigned long *addr, unsigned long mask);
 extern unsigned long ___clear_bit(unsigned long *addr, unsigned long mask);
 extern unsigned long ___change_bit(unsigned long *addr, unsigned long mask);
diff --git a/include/asm-sparc/ide.h b/include/asm-sparc/ide.h
index a6d735a..4040227 100644
--- a/include/asm-sparc/ide.h
+++ b/include/asm-sparc/ide.h
@@ -18,7 +18,6 @@
 #undef  MAX_HWIFS
 #define MAX_HWIFS	2
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #define __ide_insl(data_reg, buffer, wcount) \
diff --git a/include/asm-sparc/tlbflush.h b/include/asm-sparc/tlbflush.h
index a619da5..b957e29 100644
--- a/include/asm-sparc/tlbflush.h
+++ b/include/asm-sparc/tlbflush.h
@@ -13,7 +13,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 #ifdef CONFIG_SMP
@@ -42,11 +41,6 @@
 BTFIXUPDEF_CALL(void, flush_tlb_range, struct vm_area_struct *, unsigned long, unsigned long)
 BTFIXUPDEF_CALL(void, flush_tlb_page, struct vm_area_struct *, unsigned long)
 
-// Thanks to Anton Blanchard, our pagetables became uncached in 2.4. Wee!
-// extern void flush_tlb_pgtables(struct mm_struct *mm,
-//     unsigned long start, unsigned long end);
-#define flush_tlb_pgtables(mm, start, end)	do{ }while(0)
-
 #define flush_tlb_all() BTFIXUP_CALL(flush_tlb_all)()
 #define flush_tlb_mm(mm) BTFIXUP_CALL(flush_tlb_mm)(mm)
 #define flush_tlb_range(vma,start,end) BTFIXUP_CALL(flush_tlb_range)(vma,start,end)
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index dd4bfe9..982ce89 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -7,6 +7,10 @@
 #ifndef _SPARC64_BITOPS_H
 #define _SPARC64_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/byteorder.h>
 
diff --git a/include/asm-sparc64/ide.h b/include/asm-sparc64/ide.h
index 55149cf..ac7eb21 100644
--- a/include/asm-sparc64/ide.h
+++ b/include/asm-sparc64/ide.h
@@ -24,7 +24,6 @@
 # endif
 #endif
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #define __ide_insl(data_reg, buffer, wcount) \
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h
index 42c0994..1c1c5ea 100644
--- a/include/asm-sparc64/smp.h
+++ b/include/asm-sparc64/smp.h
@@ -26,7 +26,7 @@
  *	Private routines/data
  */
  
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/atomic.h>
 #include <asm/percpu.h>
 
diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 3487328..fbb675d 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -41,11 +41,4 @@
 
 #endif /* ! CONFIG_SMP */
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	/* We don't use virtual page tables for TLB miss processing
-	 * any more.  Nowadays we use the TSB.
-	 */
-}
-
 #endif /* _SPARC64_TLBFLUSH_H */
diff --git a/include/asm-um/bitops.h b/include/asm-um/bitops.h
index 46d7819..e4d38d4 100644
--- a/include/asm-um/bitops.h
+++ b/include/asm-um/bitops.h
@@ -1,6 +1,10 @@
 #ifndef __UM_BITOPS_H
 #define __UM_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include "asm/arch/bitops.h"
 
 #endif
diff --git a/include/asm-um/tlbflush.h b/include/asm-um/tlbflush.h
index 9d647c5..614f2c0 100644
--- a/include/asm-um/tlbflush.h
+++ b/include/asm-um/tlbflush.h
@@ -17,7 +17,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_kernel_vm() flushes the kernel vm area
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 extern void flush_tlb_all(void);
@@ -29,9 +28,4 @@
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void __flush_tlb_one(unsigned long addr);
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
-
 #endif
diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h
index 8eafdb1..f82f5b4 100644
--- a/include/asm-v850/bitops.h
+++ b/include/asm-v850/bitops.h
@@ -13,6 +13,9 @@
 #ifndef __V850_BITOPS_H__
 #define __V850_BITOPS_H__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
 
 #include <linux/compiler.h>	/* unlikely  */
 #include <asm/byteorder.h>	/* swab32 */
diff --git a/include/asm-v850/tlbflush.h b/include/asm-v850/tlbflush.h
index 5f2f85f..c44aa64 100644
--- a/include/asm-v850/tlbflush.h
+++ b/include/asm-v850/tlbflush.h
@@ -61,10 +61,4 @@
 	BUG ();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	BUG ();
-}
-
 #endif /* __V850_TLBFLUSH_H__ */
diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h
index 125179ad..723493e 100644
--- a/include/asm-x86/acpi_32.h
+++ b/include/asm-x86/acpi_32.h
@@ -81,11 +81,7 @@
         :"=r"(n_hi), "=r"(n_lo)     \
         :"0"(n_hi), "1"(n_lo))
 
-#ifdef CONFIG_X86_IO_APIC
-extern void check_acpi_pci(void);
-#else
-static inline void check_acpi_pci(void) { }
-#endif
+extern void early_quirks(void);
 
 #ifdef CONFIG_ACPI
 extern int acpi_lapic;
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index c96641f..3268a34 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -5,6 +5,10 @@
  * Copyright 1992, Linus Torvalds.
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/alternative.h>
 
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 525edf2..dacaa5f 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -5,6 +5,10 @@
  * Copyright 1992, Linus Torvalds.
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/alternative.h>
 
 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h
index 53cb96b..66ba798 100644
--- a/include/asm-x86/compat.h
+++ b/include/asm-x86/compat.h
@@ -6,6 +6,7 @@
  */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <asm/user32.h>
 
 #define COMPAT_USER_HZ	100
 
@@ -181,6 +182,11 @@
 };
 
 /*
+ * The type of struct elf_prstatus.pr_reg in compatible core dumps.
+ */
+typedef struct user_regs_struct32 compat_elf_gregset_t;
+
+/*
  * A pointer passed in from user mode. This should not
  * be used for syscall parameters, just declare them
  * as pointers because the syscall entry code will have
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index ac991b5..7d9c938 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -20,6 +20,16 @@
 #define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8))
 #define clear_LDT()  asm volatile("lldt %w0"::"r" (0))
 
+static inline unsigned long __store_tr(void)
+{
+       unsigned long tr;
+
+       asm volatile ("str %w0":"=r" (tr));
+       return tr;
+}
+
+#define store_tr(tr) (tr) = __store_tr()
+
 /*
  * This is the ldt that every process will get unless we need
  * something other than this.
@@ -31,6 +41,16 @@
 /* the cpu gdt accessor */
 #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address)
 
+static inline void load_gdt(const struct desc_ptr *ptr)
+{
+	asm volatile("lgdt %w0"::"m" (*ptr));
+}
+
+static inline void store_gdt(struct desc_ptr *ptr)
+{
+       asm("sgdt %w0":"=m" (*ptr));
+}
+
 static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)  
 {
 	struct gate_struct s; 	
@@ -71,6 +91,16 @@
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist);
 }
 
+static inline void load_idt(const struct desc_ptr *ptr)
+{
+	asm volatile("lidt %w0"::"m" (*ptr));
+}
+
+static inline void store_idt(struct desc_ptr *dtr)
+{
+       asm("sidt %w0":"=m" (*dtr));
+}
+
 static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, 
 					 unsigned size) 
 { 
diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h
index d948988..771af33 100644
--- a/include/asm-x86/geode.h
+++ b/include/asm-x86/geode.h
@@ -38,6 +38,8 @@
 #define MSR_LBAR_ACPI		0x5140000E
 #define MSR_LBAR_PMS		0x5140000F
 
+#define MSR_DIVIL_SOFT_RESET	0x51400017
+
 #define MSR_PIC_YSEL_LOW	0x51400020
 #define MSR_PIC_YSEL_HIGH	0x51400021
 #define MSR_PIC_ZSEL_LOW	0x51400022
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index d4ab6db..4f51519 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -64,6 +64,7 @@
 /* hpet memory map physical address */
 extern unsigned long hpet_address;
 extern unsigned long force_hpet_address;
+extern int hpet_force_user;
 extern int is_hpet_enabled(void);
 extern int hpet_enable(void);
 extern unsigned long hpet_readl(unsigned long a);
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index e7817a3..42130ad 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -62,7 +62,6 @@
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_BLK_DEV_IDEPCI
diff --git a/include/asm-x86/io_apic_64.h b/include/asm-x86/io_apic_64.h
index d9f2e54..e2c1367 100644
--- a/include/asm-x86/io_apic_64.h
+++ b/include/asm-x86/io_apic_64.h
@@ -133,4 +133,6 @@
 
 extern spinlock_t i8259A_lock;
 
+extern int timer_over_8254;
+
 #endif
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index a7c75ea..6d011bd 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -119,7 +119,7 @@
 	 */
 	local_irq_save(flags);
 	for_each_cpu_mask(query_cpu, mask) {
-		__send_IPI_dest_field(x86_cpu_to_apicid[query_cpu],
+		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
 				      vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h
index 36f3106..aca9c96 100644
--- a/include/asm-x86/irq_32.h
+++ b/include/asm-x86/irq_32.h
@@ -45,4 +45,7 @@
 void init_IRQ(void);
 void __init native_init_IRQ(void);
 
+/* Interrupt vector management */
+extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
+
 #endif /* _ASM_IRQ_H */
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index a02eb29..a494473 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -73,8 +73,32 @@
 #define MSR_P6_EVNTSEL0			0x00000186
 #define MSR_P6_EVNTSEL1			0x00000187
 
-/* K7/K8 MSRs. Not complete. See the architecture manual for a more
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
    complete list. */
+
+#define MSR_AMD64_IBSFETCHCTL		0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD		0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD	0xc0011032
+#define MSR_AMD64_IBSOPCTL		0xc0011033
+#define MSR_AMD64_IBSOPRIP		0xc0011034
+#define MSR_AMD64_IBSOPDATA		0xc0011035
+#define MSR_AMD64_IBSOPDATA2		0xc0011036
+#define MSR_AMD64_IBSOPDATA3		0xc0011037
+#define MSR_AMD64_IBSDCLINAD		0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD		0xc0011039
+#define MSR_AMD64_IBSCTL		0xc001103a
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1			0xc001001a
+#define MSR_K8_TOP_MEM2			0xc001001d
+#define MSR_K8_SYSCFG			0xc0010010
+#define MSR_K8_HWCR			0xc0010015
+#define MSR_K8_ENABLE_C1E		0xc0010055
+#define K8_MTRRFIXRANGE_DRAM_ENABLE	0x00040000 /* MtrrFixDramEn bit    */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY	0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK	0x18181818 /* Mask: RdMem|WrMem    */
+
+/* K7 MSRs */
 #define MSR_K7_EVNTSEL0			0xc0010000
 #define MSR_K7_PERFCTR0			0xc0010004
 #define MSR_K7_EVNTSEL1			0xc0010001
@@ -83,20 +107,10 @@
 #define MSR_K7_PERFCTR2			0xc0010006
 #define MSR_K7_EVNTSEL3			0xc0010003
 #define MSR_K7_PERFCTR3			0xc0010007
-#define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K7_CLK_CTL			0xc001001b
-#define MSR_K8_TOP_MEM2			0xc001001d
-#define MSR_K8_SYSCFG			0xc0010010
-
-#define K8_MTRRFIXRANGE_DRAM_ENABLE	0x00040000 /* MtrrFixDramEn bit    */
-#define K8_MTRRFIXRANGE_DRAM_MODIFY	0x00080000 /* MtrrFixDramModEn bit */
-#define K8_MTRR_RDMEM_WRMEM_MASK	0x18181818 /* Mask: RdMem|WrMem    */
-
 #define MSR_K7_HWCR			0xc0010015
-#define MSR_K8_HWCR			0xc0010015
 #define MSR_K7_FID_VID_CTL		0xc0010041
 #define MSR_K7_FID_VID_STATUS		0xc0010042
-#define MSR_K8_ENABLE_C1E		0xc0010055
 
 /* K6 MSRs */
 #define MSR_K6_EFER			0xc0000080
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index acd4b33..ed3e70d 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -17,10 +17,7 @@
 #include <linux/threads.h>
 #include <asm/paravirt.h>
 
-#ifndef _I386_BITOPS_H
-#include <asm/bitops.h>
-#endif
-
+#include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index a79f535..9b0ff47 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -9,7 +9,7 @@
  * the x86-64 page table tree.
  */
 #include <asm/processor.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <linux/threads.h>
 #include <asm/pda.h>
 
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 83800e7..13976b0 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -79,6 +79,7 @@
 	unsigned char booted_cores;	/* number of cores as seen by OS */
 	__u8 phys_proc_id; 		/* Physical processor id. */
 	__u8 cpu_core_id;  		/* Core id */
+	__u8 cpu_index;			/* index into per_cpu list */
 #endif
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
@@ -103,14 +104,19 @@
 DECLARE_PER_CPU(struct tss_struct, init_tss);
 
 #ifdef CONFIG_SMP
-extern struct cpuinfo_x86 cpu_data[];
-#define current_cpu_data cpu_data[smp_processor_id()]
+DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+#define cpu_data(cpu)		per_cpu(cpu_info, cpu)
+#define current_cpu_data	cpu_data(smp_processor_id())
 #else
-#define cpu_data (&boot_cpu_data)
-#define current_cpu_data boot_cpu_data
+#define cpu_data(cpu)		boot_cpu_data
+#define current_cpu_data	boot_cpu_data
 #endif
 
-extern	int cpu_llc_id[NR_CPUS];
+/*
+ * the following now lives in the per cpu area:
+ * extern	int cpu_llc_id[NR_CPUS];
+ */
+DECLARE_PER_CPU(u8, cpu_llc_id);
 extern char ignore_fpu_irq;
 
 void __init cpu_detect(struct cpuinfo_x86 *c);
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index f422bec..e4f1997 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -74,6 +74,7 @@
 	__u8	booted_cores;	/* number of cores as seen by OS */
 	__u8	phys_proc_id;	/* Physical Processor id. */
 	__u8	cpu_core_id;	/* Core id. */
+	__u8	cpu_index;	/* index into per_cpu list */
 #endif
 } ____cacheline_aligned;
 
@@ -88,11 +89,12 @@
 #define X86_VENDOR_UNKNOWN 0xff
 
 #ifdef CONFIG_SMP
-extern struct cpuinfo_x86 cpu_data[];
-#define current_cpu_data cpu_data[smp_processor_id()]
+DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+#define cpu_data(cpu)		per_cpu(cpu_info, cpu)
+#define current_cpu_data	cpu_data(smp_processor_id())
 #else
-#define cpu_data (&boot_cpu_data)
-#define current_cpu_data boot_cpu_data
+#define cpu_data(cpu)		boot_cpu_data
+#define current_cpu_data	boot_cpu_data
 #endif
 
 extern char ignore_irq13;
@@ -390,12 +392,6 @@
 	asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
 } 
 
-#define ARCH_HAS_PREFETCH
-static inline void prefetch(void *x) 
-{ 
-	asm volatile("prefetcht0 (%0)" :: "r" (x));
-} 
-
 #define ARCH_HAS_PREFETCHW 1
 static inline void prefetchw(void *x) 
 { 
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index c44a3a9..dabba55 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -83,8 +83,6 @@
 extern int reboot_force;
 extern int notsc_setup(char *);
 
-extern int timer_over_8254;
-
 extern int gsi_irq_sharing(int gsi);
 
 extern int force_mwait;
diff --git a/include/asm-x86/ptrace_32.h b/include/asm-x86/ptrace_32.h
index 6002597..78d063d 100644
--- a/include/asm-x86/ptrace_32.h
+++ b/include/asm-x86/ptrace_32.h
@@ -55,6 +55,8 @@
 }
 
 #define instruction_pointer(regs) ((regs)->eip)
+#define frame_pointer(regs) ((regs)->ebp)
+#define stack_pointer(regs) ((regs)->esp)
 #define regs_return_value(regs) ((regs)->eax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/include/asm-x86/ptrace_64.h b/include/asm-x86/ptrace_64.h
index 7f166cc..7bfe61e 100644
--- a/include/asm-x86/ptrace_64.h
+++ b/include/asm-x86/ptrace_64.h
@@ -40,6 +40,8 @@
 #define user_mode(regs) (!!((regs)->cs & 3))
 #define user_mode_vm(regs) user_mode(regs)
 #define instruction_pointer(regs) ((regs)->rip)
+#define frame_pointer(regs) ((regs)->rbp)
+#define stack_pointer(regs) ((regs)->rsp)
 #define regs_return_value(regs) ((regs)->rax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h
index ee46038..7056d86 100644
--- a/include/asm-x86/smp_32.h
+++ b/include/asm-x86/smp_32.h
@@ -11,7 +11,7 @@
 #endif
 
 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/mpspec.h>
 #include <asm/apic.h>
 #ifdef CONFIG_X86_IO_APIC
@@ -39,9 +39,11 @@
 extern void unlock_ipi_call_lock(void);
 
 #define MAX_APICID 256
-extern u8 x86_cpu_to_apicid[];
+extern u8 __initdata x86_cpu_to_apicid_init[];
+extern void *x86_cpu_to_apicid_ptr;
+DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
 
-#define cpu_physical_id(cpu)	x86_cpu_to_apicid[cpu]
+#define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
 
 extern void set_cpu_sibling_map(int cpu);
 
diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h
index d30e9b6..6f0e027 100644
--- a/include/asm-x86/smp_64.h
+++ b/include/asm-x86/smp_64.h
@@ -37,6 +37,8 @@
 extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern void smp_send_reschedule(int cpu);
+extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
+				  void *info, int wait);
 
 /*
  * cpu_sibling_map and cpu_core_map now live
@@ -47,7 +49,7 @@
  */
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
-extern u8 cpu_llc_id[NR_CPUS];
+DECLARE_PER_CPU(u8, cpu_llc_id);
 
 #define SMP_TRAMPOLINE_BASE 0x6000
 
@@ -84,7 +86,9 @@
  * Some lowlevel functions might want to know about
  * the real APIC ID <-> CPU # mapping.
  */
-extern u8 x86_cpu_to_apicid[NR_CPUS];	/* physical ID */
+extern u8 __initdata x86_cpu_to_apicid_init[];
+extern void *x86_cpu_to_apicid_ptr;
+DECLARE_PER_CPU(u8, x86_cpu_to_apicid);	/* physical ID */
 extern u8 bios_cpu_apicid[];
 
 static inline int cpu_present_to_apicid(int mps_cpu)
@@ -115,8 +119,9 @@
 }
 
 #ifdef CONFIG_SMP
-#define cpu_physical_id(cpu)		x86_cpu_to_apicid[cpu]
+#define cpu_physical_id(cpu)		per_cpu(x86_cpu_to_apicid, cpu)
 #else
+extern unsigned int boot_cpu_id;
 #define cpu_physical_id(cpu)		boot_cpu_id
 #endif /* !CONFIG_SMP */
 #endif
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index db6283e..ef84688 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -315,5 +315,6 @@
 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 
 void default_idle(void);
+void __show_registers(struct pt_regs *, int all);
 
 #endif
diff --git a/include/asm-x86/tlbflush_32.h b/include/asm-x86/tlbflush_32.h
index a50fa67..2bd5b95 100644
--- a/include/asm-x86/tlbflush_32.h
+++ b/include/asm-x86/tlbflush_32.h
@@ -78,7 +78,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  *  - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus
  *
  * ..but the i386 has somewhat limited tlb flushing capabilities,
@@ -166,10 +165,4 @@
 	flush_tlb_all();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	/* i386 does not keep any page table caches in TLB */
-}
-
 #endif /* _I386_TLBFLUSH_H */
diff --git a/include/asm-x86/tlbflush_64.h b/include/asm-x86/tlbflush_64.h
index 888eb4a..7731fd2 100644
--- a/include/asm-x86/tlbflush_64.h
+++ b/include/asm-x86/tlbflush_64.h
@@ -31,7 +31,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  *
  * x86-64 can only flush individual pages or full VMs. For a range flush
  * we always do the full VM. Might be worth trying if for a small
@@ -98,12 +97,4 @@
 	flush_tlb_all();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	/* x86_64 does not keep any page table caches in a software TLB.
-	   The CPUs do in their hardware TLBs, but they are handled
-	   by the normal TLB flushing algorithms. */
-}
-
 #endif /* _X8664_TLBFLUSH_H */
diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h
index ae10746..9040f5a 100644
--- a/include/asm-x86/topology_32.h
+++ b/include/asm-x86/topology_32.h
@@ -28,8 +28,8 @@
 #define _ASM_I386_TOPOLOGY_H
 
 #ifdef CONFIG_X86_HT
-#define topology_physical_package_id(cpu)	(cpu_data[cpu].phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data[cpu].cpu_core_id)
+#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
+#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #endif
diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h
index 848c17f..a718dda 100644
--- a/include/asm-x86/topology_64.h
+++ b/include/asm-x86/topology_64.h
@@ -5,7 +5,7 @@
 #ifdef CONFIG_NUMA
 
 #include <asm/mpspec.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 extern cpumask_t cpu_online_map;
 
@@ -56,8 +56,8 @@
 #endif
 
 #ifdef CONFIG_SMP
-#define topology_physical_package_id(cpu)	(cpu_data[cpu].phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data[cpu].cpu_core_id)
+#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
+#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h
index 78db04c..23261e8 100644
--- a/include/asm-xtensa/bitops.h
+++ b/include/asm-xtensa/bitops.h
@@ -15,6 +15,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/processor.h>
 #include <asm/byteorder.h>
 #include <asm/system.h>
diff --git a/include/asm-xtensa/tlbflush.h b/include/asm-xtensa/tlbflush.h
index 7c637b3..46d2400 100644
--- a/include/asm-xtensa/tlbflush.h
+++ b/include/asm-xtensa/tlbflush.h
@@ -41,17 +41,6 @@
 
 #define flush_tlb_kernel_range(start,end) flush_tlb_all()
 
-
-/* This is calld in munmap when we have freed up some page-table pages.
- * We don't need to do anything here, there's nothing special about our
- * page-table pages.
- */
-
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                      unsigned long start, unsigned long end)
-{
-}
-
 /* TLB operations. */
 
 static inline unsigned long itlb_probe(unsigned long addr)
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 7ac8303..e3ffd14 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -47,6 +47,7 @@
 header-y += coff.h
 header-y += comstats.h
 header-y += const.h
+header-y += cgroupstats.h
 header-y += cycx_cfm.h
 header-y += dlm_device.h
 header-y += dlm_netlink.h
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index bf5e000..8ccedf7 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -189,32 +189,6 @@
 extern int acpi_blacklisted(void);
 extern void acpi_bios_year(char *s);
 
-#define	ACPI_CSTATE_LIMIT_DEFINED	/* for driver builds */
-#ifdef	CONFIG_ACPI
-
-/*
- * Set highest legal C-state
- * 0: C0 okay, but not C1
- * 1: C1 okay, but not C2
- * 2: C2 okay, but not C3 etc.
- */
-
-extern unsigned int max_cstate;
-
-static inline unsigned int acpi_get_cstate_limit(void)
-{
-	return max_cstate;
-}
-static inline void acpi_set_cstate_limit(unsigned int new_limit)
-{
-	max_cstate = new_limit;
-	return;
-}
-#else
-static inline unsigned int acpi_get_cstate_limit(void) { return 0; }
-static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
-#endif
-
 #ifdef CONFIG_ACPI_NUMA
 int acpi_get_pxm(acpi_handle handle);
 int acpi_get_node(acpi_handle *handle);
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 64b4641..acad110 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/string.h>
+#include <linux/kernel.h>
 
 /*
  * bitmaps provide bit arrays that consume one or more unsigned
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index b9fb8ee..69c1edb 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -2,6 +2,14 @@
 #define _LINUX_BITOPS_H
 #include <asm/types.h>
 
+#ifdef	__KERNEL__
+#define BIT(nr)			(1UL << (nr))
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)
+#define BITS_PER_BYTE		8
+#endif
+
 /*
  * Include this here because some architectures need generic_ffs/fls in
  * scope
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
new file mode 100644
index 0000000..8747932
--- /dev/null
+++ b/include/linux/cgroup.h
@@ -0,0 +1,327 @@
+#ifndef _LINUX_CGROUP_H
+#define _LINUX_CGROUP_H
+/*
+ *  cgroup interface
+ *
+ *  Copyright (C) 2003 BULL SA
+ *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <linux/cpumask.h>
+#include <linux/nodemask.h>
+#include <linux/rcupdate.h>
+#include <linux/cgroupstats.h>
+
+#ifdef CONFIG_CGROUPS
+
+struct cgroupfs_root;
+struct cgroup_subsys;
+struct inode;
+
+extern int cgroup_init_early(void);
+extern int cgroup_init(void);
+extern void cgroup_init_smp(void);
+extern void cgroup_lock(void);
+extern void cgroup_unlock(void);
+extern void cgroup_fork(struct task_struct *p);
+extern void cgroup_fork_callbacks(struct task_struct *p);
+extern void cgroup_post_fork(struct task_struct *p);
+extern void cgroup_exit(struct task_struct *p, int run_callbacks);
+extern int cgroupstats_build(struct cgroupstats *stats,
+				struct dentry *dentry);
+
+extern struct file_operations proc_cgroup_operations;
+
+/* Define the enumeration of all cgroup subsystems */
+#define SUBSYS(_x) _x ## _subsys_id,
+enum cgroup_subsys_id {
+#include <linux/cgroup_subsys.h>
+	CGROUP_SUBSYS_COUNT
+};
+#undef SUBSYS
+
+/* Per-subsystem/per-cgroup state maintained by the system. */
+struct cgroup_subsys_state {
+	/* The cgroup that this subsystem is attached to. Useful
+	 * for subsystems that want to know about the cgroup
+	 * hierarchy structure */
+	struct cgroup *cgroup;
+
+	/* State maintained by the cgroup system to allow
+	 * subsystems to be "busy". Should be accessed via css_get()
+	 * and css_put() */
+
+	atomic_t refcnt;
+
+	unsigned long flags;
+};
+
+/* bits in struct cgroup_subsys_state flags field */
+enum {
+	CSS_ROOT, /* This CSS is the root of the subsystem */
+};
+
+/*
+ * Call css_get() to hold a reference on the cgroup;
+ *
+ */
+
+static inline void css_get(struct cgroup_subsys_state *css)
+{
+	/* We don't need to reference count the root state */
+	if (!test_bit(CSS_ROOT, &css->flags))
+		atomic_inc(&css->refcnt);
+}
+/*
+ * css_put() should be called to release a reference taken by
+ * css_get()
+ */
+
+extern void __css_put(struct cgroup_subsys_state *css);
+static inline void css_put(struct cgroup_subsys_state *css)
+{
+	if (!test_bit(CSS_ROOT, &css->flags))
+		__css_put(css);
+}
+
+struct cgroup {
+	unsigned long flags;		/* "unsigned long" so bitops work */
+
+	/* count users of this cgroup. >0 means busy, but doesn't
+	 * necessarily indicate the number of tasks in the
+	 * cgroup */
+	atomic_t count;
+
+	/*
+	 * We link our 'sibling' struct into our parent's 'children'.
+	 * Our children link their 'sibling' into our 'children'.
+	 */
+	struct list_head sibling;	/* my parent's children */
+	struct list_head children;	/* my children */
+
+	struct cgroup *parent;	/* my parent */
+	struct dentry *dentry;	  	/* cgroup fs entry */
+
+	/* Private pointers for each registered subsystem */
+	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+	struct cgroupfs_root *root;
+	struct cgroup *top_cgroup;
+
+	/*
+	 * List of cg_cgroup_links pointing at css_sets with
+	 * tasks in this cgroup. Protected by css_set_lock
+	 */
+	struct list_head css_sets;
+
+	/*
+	 * Linked list running through all cgroups that can
+	 * potentially be reaped by the release agent. Protected by
+	 * release_list_lock
+	 */
+	struct list_head release_list;
+};
+
+/* A css_set is a structure holding pointers to a set of
+ * cgroup_subsys_state objects. This saves space in the task struct
+ * object and speeds up fork()/exit(), since a single inc/dec and a
+ * list_add()/del() can bump the reference count on the entire
+ * cgroup set for a task.
+ */
+
+struct css_set {
+
+	/* Reference count */
+	struct kref ref;
+
+	/*
+	 * List running through all cgroup groups. Protected by
+	 * css_set_lock
+	 */
+	struct list_head list;
+
+	/*
+	 * List running through all tasks using this cgroup
+	 * group. Protected by css_set_lock
+	 */
+	struct list_head tasks;
+
+	/*
+	 * List of cg_cgroup_link objects on link chains from
+	 * cgroups referenced from this css_set. Protected by
+	 * css_set_lock
+	 */
+	struct list_head cg_links;
+
+	/*
+	 * Set of subsystem states, one for each subsystem. This array
+	 * is immutable after creation apart from the init_css_set
+	 * during subsystem registration (at boot time).
+	 */
+	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+};
+
+/* struct cftype:
+ *
+ * The files in the cgroup filesystem mostly have a very simple read/write
+ * handling, some common function will take care of it. Nevertheless some cases
+ * (read tasks) are special and therefore I define this structure for every
+ * kind of file.
+ *
+ *
+ * When reading/writing to a file:
+ *	- the cgroup to use in file->f_dentry->d_parent->d_fsdata
+ *	- the 'cftype' of the file is file->f_dentry->d_fsdata
+ */
+
+#define MAX_CFTYPE_NAME 64
+struct cftype {
+	/* By convention, the name should begin with the name of the
+	 * subsystem, followed by a period */
+	char name[MAX_CFTYPE_NAME];
+	int private;
+	int (*open) (struct inode *inode, struct file *file);
+	ssize_t (*read) (struct cgroup *cont, struct cftype *cft,
+			 struct file *file,
+			 char __user *buf, size_t nbytes, loff_t *ppos);
+	/*
+	 * read_uint() is a shortcut for the common case of returning a
+	 * single integer. Use it in place of read()
+	 */
+	u64 (*read_uint) (struct cgroup *cont, struct cftype *cft);
+	ssize_t (*write) (struct cgroup *cont, struct cftype *cft,
+			  struct file *file,
+			  const char __user *buf, size_t nbytes, loff_t *ppos);
+
+	/*
+	 * write_uint() is a shortcut for the common case of accepting
+	 * a single integer (as parsed by simple_strtoull) from
+	 * userspace. Use in place of write(); return 0 or error.
+	 */
+	int (*write_uint) (struct cgroup *cont, struct cftype *cft, u64 val);
+
+	int (*release) (struct inode *inode, struct file *file);
+};
+
+/* Add a new file to the given cgroup directory. Should only be
+ * called by subsystems from within a populate() method */
+int cgroup_add_file(struct cgroup *cont, struct cgroup_subsys *subsys,
+		       const struct cftype *cft);
+
+/* Add a set of new files to the given cgroup directory. Should
+ * only be called by subsystems from within a populate() method */
+int cgroup_add_files(struct cgroup *cont,
+			struct cgroup_subsys *subsys,
+			const struct cftype cft[],
+			int count);
+
+int cgroup_is_removed(const struct cgroup *cont);
+
+int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
+
+int cgroup_task_count(const struct cgroup *cont);
+
+/* Return true if the cgroup is a descendant of the current cgroup */
+int cgroup_is_descendant(const struct cgroup *cont);
+
+/* Control Group subsystem type. See Documentation/cgroups.txt for details */
+
+struct cgroup_subsys {
+	struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
+						  struct cgroup *cont);
+	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cont);
+	int (*can_attach)(struct cgroup_subsys *ss,
+			  struct cgroup *cont, struct task_struct *tsk);
+	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cont,
+			struct cgroup *old_cont, struct task_struct *tsk);
+	void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
+	void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
+	int (*populate)(struct cgroup_subsys *ss,
+			struct cgroup *cont);
+	void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cont);
+	void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
+	int subsys_id;
+	int active;
+	int early_init;
+#define MAX_CGROUP_TYPE_NAMELEN 32
+	const char *name;
+
+	/* Protected by RCU */
+	struct cgroupfs_root *root;
+
+	struct list_head sibling;
+
+	void *private;
+};
+
+#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+
+static inline struct cgroup_subsys_state *cgroup_subsys_state(
+	struct cgroup *cont, int subsys_id)
+{
+	return cont->subsys[subsys_id];
+}
+
+static inline struct cgroup_subsys_state *task_subsys_state(
+	struct task_struct *task, int subsys_id)
+{
+	return rcu_dereference(task->cgroups->subsys[subsys_id]);
+}
+
+static inline struct cgroup* task_cgroup(struct task_struct *task,
+					       int subsys_id)
+{
+	return task_subsys_state(task, subsys_id)->cgroup;
+}
+
+int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
+
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+
+/* A cgroup_iter should be treated as an opaque object */
+struct cgroup_iter {
+	struct list_head *cg_link;
+	struct list_head *task;
+};
+
+/* To iterate across the tasks in a cgroup:
+ *
+ * 1) call cgroup_iter_start to intialize an iterator
+ *
+ * 2) call cgroup_iter_next() to retrieve member tasks until it
+ *    returns NULL or until you want to end the iteration
+ *
+ * 3) call cgroup_iter_end() to destroy the iterator.
+ */
+void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it);
+struct task_struct *cgroup_iter_next(struct cgroup *cont,
+					struct cgroup_iter *it);
+void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it);
+
+#else /* !CONFIG_CGROUPS */
+
+static inline int cgroup_init_early(void) { return 0; }
+static inline int cgroup_init(void) { return 0; }
+static inline void cgroup_init_smp(void) {}
+static inline void cgroup_fork(struct task_struct *p) {}
+static inline void cgroup_fork_callbacks(struct task_struct *p) {}
+static inline void cgroup_post_fork(struct task_struct *p) {}
+static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
+
+static inline void cgroup_lock(void) {}
+static inline void cgroup_unlock(void) {}
+static inline int cgroupstats_build(struct cgroupstats *stats,
+					struct dentry *dentry)
+{
+	return -EINVAL;
+}
+
+#endif /* !CONFIG_CGROUPS */
+
+#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
new file mode 100644
index 0000000..0b9bfbd
--- /dev/null
+++ b/include/linux/cgroup_subsys.h
@@ -0,0 +1,38 @@
+/* Add subsystem definitions of the form SUBSYS(<name>) in this
+ * file. Surround each one by a line of comment markers so that
+ * patches don't collide
+ */
+
+/* */
+
+/* */
+
+#ifdef CONFIG_CPUSETS
+SUBSYS(cpuset)
+#endif
+
+/* */
+
+#ifdef CONFIG_CGROUP_CPUACCT
+SUBSYS(cpuacct)
+#endif
+
+/* */
+
+#ifdef CONFIG_CGROUP_DEBUG
+SUBSYS(debug)
+#endif
+
+/* */
+
+#ifdef CONFIG_CGROUP_NS
+SUBSYS(ns)
+#endif
+
+/* */
+
+#ifdef CONFIG_FAIR_CGROUP_SCHED
+SUBSYS(cpu_cgroup)
+#endif
+
+/* */
diff --git a/include/linux/cgroupstats.h b/include/linux/cgroupstats.h
new file mode 100644
index 0000000..4f53abf
--- /dev/null
+++ b/include/linux/cgroupstats.h
@@ -0,0 +1,70 @@
+/* cgroupstats.h - exporting per-cgroup statistics
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Balbir Singh <balbir@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef _LINUX_CGROUPSTATS_H
+#define _LINUX_CGROUPSTATS_H
+
+#include <linux/taskstats.h>
+
+/*
+ * Data shared between user space and kernel space on a per cgroup
+ * basis. This data is shared using taskstats.
+ *
+ * Most of these states are derived by looking at the task->state value
+ * For the nr_io_wait state, a flag in the delay accounting structure
+ * indicates that the task is waiting on IO
+ *
+ * Each member is aligned to a 8 byte boundary.
+ */
+struct cgroupstats {
+	__u64	nr_sleeping;		/* Number of tasks sleeping */
+	__u64	nr_running;		/* Number of tasks running */
+	__u64	nr_stopped;		/* Number of tasks in stopped state */
+	__u64	nr_uninterruptible;	/* Number of tasks in uninterruptible */
+					/* state */
+	__u64	nr_io_wait;		/* Number of tasks waiting on IO */
+};
+
+/*
+ * Commands sent from userspace
+ * Not versioned. New commands should only be inserted at the enum's end
+ * prior to __CGROUPSTATS_CMD_MAX
+ */
+
+enum {
+	CGROUPSTATS_CMD_UNSPEC = __TASKSTATS_CMD_MAX,	/* Reserved */
+	CGROUPSTATS_CMD_GET,		/* user->kernel request/get-response */
+	CGROUPSTATS_CMD_NEW,		/* kernel->user event */
+	__CGROUPSTATS_CMD_MAX,
+};
+
+#define CGROUPSTATS_CMD_MAX (__CGROUPSTATS_CMD_MAX - 1)
+
+enum {
+	CGROUPSTATS_TYPE_UNSPEC = 0,	/* Reserved */
+	CGROUPSTATS_TYPE_CGROUP_STATS,	/* contains name + stats */
+	__CGROUPSTATS_TYPE_MAX,
+};
+
+#define CGROUPSTATS_TYPE_MAX (__CGROUPSTATS_TYPE_MAX - 1)
+
+enum {
+	CGROUPSTATS_CMD_ATTR_UNSPEC = 0,
+	CGROUPSTATS_CMD_ATTR_FD,
+	__CGROUPSTATS_CMD_ATTR_MAX,
+};
+
+#define CGROUPSTATS_CMD_ATTR_MAX (__CGROUPSTATS_CMD_ATTR_MAX - 1)
+
+#endif /* _LINUX_CGROUPSTATS_H */
diff --git a/include/linux/cpu_acct.h b/include/linux/cpu_acct.h
new file mode 100644
index 0000000..6b5fd8a
--- /dev/null
+++ b/include/linux/cpu_acct.h
@@ -0,0 +1,14 @@
+
+#ifndef _LINUX_CPU_ACCT_H
+#define _LINUX_CPU_ACCT_H
+
+#include <linux/cgroup.h>
+#include <asm/cputime.h>
+
+#ifdef CONFIG_CGROUP_CPUACCT
+extern void cpuacct_charge(struct task_struct *, cputime_t cputime);
+#else
+static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {}
+#endif
+
+#endif
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
new file mode 100644
index 0000000..16a5154
--- /dev/null
+++ b/include/linux/cpuidle.h
@@ -0,0 +1,180 @@
+/*
+ * cpuidle.h - a generic framework for CPU idle power management
+ *
+ * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *          Shaohua Li <shaohua.li@intel.com>
+ *          Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#ifndef _LINUX_CPUIDLE_H
+#define _LINUX_CPUIDLE_H
+
+#include <linux/percpu.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+#include <linux/completion.h>
+
+#define CPUIDLE_STATE_MAX	8
+#define CPUIDLE_NAME_LEN	16
+
+struct cpuidle_device;
+
+
+/****************************
+ * CPUIDLE DEVICE INTERFACE *
+ ****************************/
+
+struct cpuidle_state {
+	char		name[CPUIDLE_NAME_LEN];
+	void		*driver_data;
+
+	unsigned int	flags;
+	unsigned int	exit_latency; /* in US */
+	unsigned int	power_usage; /* in mW */
+	unsigned int	target_residency; /* in US */
+
+	unsigned int	usage;
+	unsigned int	time; /* in US */
+
+	int (*enter)	(struct cpuidle_device *dev,
+			 struct cpuidle_state *state);
+};
+
+/* Idle State Flags */
+#define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
+#define CPUIDLE_FLAG_CHECK_BM	(0x02) /* BM activity will exit state */
+#define CPUIDLE_FLAG_SHALLOW	(0x10) /* low latency, minimal savings */
+#define CPUIDLE_FLAG_BALANCED	(0x20) /* medium latency, moderate savings */
+#define CPUIDLE_FLAG_DEEP	(0x40) /* high latency, large savings */
+
+#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
+
+/**
+ * cpuidle_get_statedata - retrieves private driver state data
+ * @state: the state
+ */
+static inline void * cpuidle_get_statedata(struct cpuidle_state *state)
+{
+	return state->driver_data;
+}
+
+/**
+ * cpuidle_set_statedata - stores private driver state data
+ * @state: the state
+ * @data: the private data
+ */
+static inline void
+cpuidle_set_statedata(struct cpuidle_state *state, void *data)
+{
+	state->driver_data = data;
+}
+
+struct cpuidle_state_kobj {
+	struct cpuidle_state *state;
+	struct completion kobj_unregister;
+	struct kobject kobj;
+};
+
+struct cpuidle_device {
+	int			enabled:1;
+	unsigned int		cpu;
+
+	int			last_residency;
+	int			state_count;
+	struct cpuidle_state	states[CPUIDLE_STATE_MAX];
+	struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
+	struct cpuidle_state	*last_state;
+
+	struct list_head 	device_list;
+	struct kobject		kobj;
+	struct completion	kobj_unregister;
+	void			*governor_data;
+};
+
+DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+
+/**
+ * cpuidle_get_last_residency - retrieves the last state's residency time
+ * @dev: the target CPU
+ *
+ * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_VALID isn't set
+ */
+static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
+{
+	return dev->last_residency;
+}
+
+
+/****************************
+ * CPUIDLE DRIVER INTERFACE *
+ ****************************/
+
+struct cpuidle_driver {
+	char			name[CPUIDLE_NAME_LEN];
+	struct module 		*owner;
+};
+
+#ifdef CONFIG_CPU_IDLE
+
+extern int cpuidle_register_driver(struct cpuidle_driver *drv);
+extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
+extern int cpuidle_register_device(struct cpuidle_device *dev);
+extern void cpuidle_unregister_device(struct cpuidle_device *dev);
+
+extern void cpuidle_pause_and_lock(void);
+extern void cpuidle_resume_and_unlock(void);
+extern int cpuidle_enable_device(struct cpuidle_device *dev);
+extern void cpuidle_disable_device(struct cpuidle_device *dev);
+
+#else
+
+static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
+{return 0;}
+static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
+static inline int cpuidle_register_device(struct cpuidle_device *dev)
+{return 0;}
+static inline void cpuidle_unregister_device(struct cpuidle_device *dev) { }
+
+static inline void cpuidle_pause_and_lock(void) { }
+static inline void cpuidle_resume_and_unlock(void) { }
+static inline int cpuidle_enable_device(struct cpuidle_device *dev)
+{return 0;}
+static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
+
+#endif
+
+/******************************
+ * CPUIDLE GOVERNOR INTERFACE *
+ ******************************/
+
+struct cpuidle_governor {
+	char			name[CPUIDLE_NAME_LEN];
+	struct list_head 	governor_list;
+	unsigned int		rating;
+
+	int  (*enable)		(struct cpuidle_device *dev);
+	void (*disable)		(struct cpuidle_device *dev);
+
+	int  (*select)		(struct cpuidle_device *dev);
+	void (*reflect)		(struct cpuidle_device *dev);
+
+	struct module 		*owner;
+};
+
+#ifdef CONFIG_CPU_IDLE
+
+extern int cpuidle_register_governor(struct cpuidle_governor *gov);
+extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
+
+#else
+
+static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
+{return 0;}
+static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { }
+
+#endif
+
+#endif /* _LINUX_CPUIDLE_H */
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index ea44d2e..ecae585 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
+#include <linux/cgroup.h>
 
 #ifdef CONFIG_CPUSETS
 
@@ -19,9 +20,8 @@
 extern int cpuset_init_early(void);
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
-extern void cpuset_fork(struct task_struct *p);
-extern void cpuset_exit(struct task_struct *p);
 extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
+extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
@@ -76,18 +76,22 @@
 
 extern void cpuset_track_online_nodes(void);
 
+extern int current_cpuset_is_being_rebound(void);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline int cpuset_init_early(void) { return 0; }
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
-static inline void cpuset_fork(struct task_struct *p) {}
-static inline void cpuset_exit(struct task_struct *p) {}
 
 static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
 {
 	return cpu_possible_map;
 }
+static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p)
+{
+	return cpu_possible_map;
+}
 
 static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
 {
@@ -148,6 +152,11 @@
 
 static inline void cpuset_track_online_nodes(void) {}
 
+static inline int current_cpuset_is_being_rebound(void)
+{
+	return 0;
+}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 55d1ca5..ab94bc0 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -26,6 +26,7 @@
  * Used to set current->delays->flags
  */
 #define DELAYACCT_PF_SWAPIN	0x00000001	/* I am doing a swapin */
+#define DELAYACCT_PF_BLKIO	0x00000002	/* I am waiting on IO */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 
@@ -39,6 +40,14 @@
 extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 
+static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
+{
+	if (p->delays)
+		return (p->delays->flags & DELAYACCT_PF_BLKIO);
+	else
+		return 0;
+}
+
 static inline void delayacct_set_flag(int flag)
 {
 	if (current->delays)
@@ -71,6 +80,7 @@
 
 static inline void delayacct_blkio_start(void)
 {
+	delayacct_set_flag(DELAYACCT_PF_BLKIO);
 	if (current->delays)
 		__delayacct_blkio_start();
 }
@@ -79,6 +89,7 @@
 {
 	if (current->delays)
 		__delayacct_blkio_end();
+	delayacct_clear_flag(DELAYACCT_PF_BLKIO);
 }
 
 static inline int delayacct_add_tsk(struct taskstats *d,
@@ -116,6 +127,8 @@
 { return 0; }
 static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 { return 0; }
+static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
+{ return 0; }
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 499f537..37c66d1 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -183,11 +183,14 @@
  */
 uint32_t dm_get_event_nr(struct mapped_device *md);
 int dm_wait_event(struct mapped_device *md, int event_nr);
+uint32_t dm_next_uevent_seq(struct mapped_device *md);
+void dm_uevent_add(struct mapped_device *md, struct list_head *elist);
 
 /*
  * Info functions.
  */
 const char *dm_device_name(struct mapped_device *md);
+int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid);
 struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct mapped_device *md);
 int dm_noflush_suspending(struct dm_target *ti);
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index b934861..523281c 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -131,6 +131,7 @@
 	char name[DM_NAME_LEN];	/* device name */
 	char uuid[DM_UUID_LEN];	/* unique identifier for
 				 * the block device */
+	char data[7];		/* padding or data */
 };
 
 /*
@@ -285,9 +286,9 @@
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	11
+#define DM_VERSION_MINOR	12
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2006-10-12)"
+#define DM_VERSION_EXTRA	"-ioctl (2007-10-02)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6a4d170..1657e99 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -123,6 +123,7 @@
 #define MS_SLAVE	(1<<19)	/* change to slave */
 #define MS_SHARED	(1<<20)	/* change to shared */
 #define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
 #define MS_ACTIVE	(1<<30)
 #define MS_NOUSER	(1<<31)
 
@@ -1459,7 +1460,8 @@
 
 extern int register_filesystem(struct file_system_type *);
 extern int unregister_filesystem(struct file_system_type *);
-extern struct vfsmount *kern_mount(struct file_system_type *);
+extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
+#define kern_mount(type) kern_mount_data(type, NULL)
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern void umount_tree(struct vfsmount *, int, struct list_head *);
@@ -1922,6 +1924,8 @@
 
 extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
 
+extern void get_filesystem(struct file_system_type *fs);
+extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
 extern struct super_block *user_get_super(dev_t);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index edb8024..6e35b92 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -469,8 +469,8 @@
 	/* handler for raw output data, used by hidraw */
 	int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t);
 #ifdef CONFIG_USB_HIDINPUT_POWERBOOK
-	unsigned long pb_pressed_fn[NBITS(KEY_MAX)];
-	unsigned long pb_pressed_numlock[NBITS(KEY_MAX)];
+	unsigned long pb_pressed_fn[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long pb_pressed_numlock[BITS_TO_LONGS(KEY_CNT)];
 #endif
 };
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 19db0a4..2e4b8dd 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -192,22 +192,20 @@
 struct hwif_s;
 typedef int (ide_ack_intr_t)(struct hwif_s *);
 
-#ifndef NO_DMA
-#define NO_DMA  255
-#endif
-
 /*
  * hwif_chipset_t is used to keep track of the specific hardware
  * chipset used by each IDE interface, if known.
  */
-typedef enum {	ide_unknown,	ide_generic,	ide_pci,
+enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
 		ide_rz1000,	ide_trm290,
 		ide_cmd646,	ide_cy82c693,	ide_4drives,
 		ide_pmac,	ide_etrax100,	ide_acorn,
 		ide_au1xxx, ide_forced
-} hwif_chipset_t;
+};
+
+typedef u8 hwif_chipset_t;
 
 /*
  * Structure to hold all information about the location of this port
@@ -215,22 +213,16 @@
 typedef struct hw_regs_s {
 	unsigned long	io_ports[IDE_NR_PORTS];	/* task file registers */
 	int		irq;			/* our irq number */
-	int		dma;			/* our dma entry */
 	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
 	hwif_chipset_t  chipset;
 	struct device	*dev;
 } hw_regs_t;
 
-/*
- * Register new hardware with ide
- */
-int ide_register_hw(hw_regs_t *, int, struct hwif_s **);
-int ide_register_hw_with_fixup(hw_regs_t *, int, struct hwif_s **,
-			       void (*)(struct hwif_s *));
+struct hwif_s * ide_find_port(unsigned long);
 
-/*
- * Set up hw_regs_t structure before calling ide_register_hw (optional)
- */
+int ide_register_hw(hw_regs_t *, void (*)(struct hwif_s *), int,
+		    struct hwif_s **);
+
 void ide_setup_ports(	hw_regs_t *hw,
 			unsigned long base,
 			int *offsets,
@@ -268,11 +260,7 @@
 # define ide_init_default_irq(base)	(0)
 #endif
 
-/*
- * ide_init_hwif_ports() is OBSOLETE and will be removed in 2.7 series.
- * New ports shouldn't define IDE_ARCH_OBSOLETE_INIT in <asm/ide.h>.
- */
-#ifdef IDE_ARCH_OBSOLETE_INIT
+#ifdef CONFIG_IDE_ARCH_OBSOLETE_INIT
 static inline void ide_init_hwif_ports(hw_regs_t *hw,
 				       unsigned long io_addr,
 				       unsigned long ctl_addr,
@@ -302,7 +290,7 @@
 	if (io_addr || ctl_addr)
 		printk(KERN_WARNING "%s: must not be called\n", __FUNCTION__);
 }
-#endif /* IDE_ARCH_OBSOLETE_INIT */
+#endif /* CONFIG_IDE_ARCH_OBSOLETE_INIT */
 
 /* Currently only m68k, apus and m8xx need it */
 #ifndef IDE_ARCH_ACK_INTR
@@ -363,7 +351,6 @@
  * ATA DATA Register Special.
  * ATA NSECTOR Count Register().
  * ATAPI Byte Count Register.
- * Channel index ordering pairs.
  */
 typedef union {
 	unsigned all			:16;
@@ -378,7 +365,7 @@
 #error "Please fix <asm/byteorder.h>"
 #endif
 	} b;
-} ata_nsector_t, ata_data_t, atapi_bcount_t, ata_index_t;
+} ata_nsector_t, ata_data_t, atapi_bcount_t;
 
 /*
  * ATA-IDE Select Register, aka Device-Head
@@ -657,7 +644,7 @@
     ((1<<ide_pci)|(1<<ide_cmd646)|(1<<ide_ali14xx))
 #define IDE_CHIPSET_IS_PCI(c)	((IDE_CHIPSET_PCI_MASK >> (c)) & 1)
 
-struct ide_pci_device_s;
+struct ide_port_info;
 
 typedef struct hwif_s {
 	struct hwif_s *next;		/* for linked-list in ide_hwgroup_t */
@@ -672,7 +659,6 @@
 	unsigned long	sata_scr[SATA_NR_PORTS];
 	unsigned long	sata_misc[SATA_NR_PORTS];
 
-	hw_regs_t	hw;		/* Hardware info */
 	ide_drive_t	drives[MAX_DRIVES];	/* drive info */
 
 	u8 major;	/* our major number */
@@ -694,7 +680,9 @@
 	hwif_chipset_t chipset;	/* sub-module for tuning.. */
 
 	struct pci_dev  *pci_dev;	/* for pci chipsets */
-	struct ide_pci_device_s	*cds;	/* chipset device struct */
+	const struct ide_port_info *cds;	/* chipset device struct */
+
+	ide_ack_intr_t *ack_intr;
 
 	void (*rw_disk)(ide_drive_t *, struct request *);
 
@@ -725,6 +713,8 @@
 	u8 (*mdma_filter)(ide_drive_t *);
 	u8 (*udma_filter)(ide_drive_t *);
 
+	void (*fixup)(struct hwif_s *);
+
 	void (*ata_input_data)(ide_drive_t *, void *, u32);
 	void (*ata_output_data)(ide_drive_t *, void *, u32);
 
@@ -841,8 +831,6 @@
 
 		/* for pci chipsets */
 	struct pci_dev *pci_dev;
-		/* chipset device struct */
-	struct ide_pci_device_s *cds;
 
 		/* current request */
 	struct request *rq;
@@ -1030,36 +1018,16 @@
 int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
 			     int uptodate, int nr_sectors);
 
-/*
- * This is used on exit from the driver to designate the next irq handler
- * and also to start the safety timer.
- */
 extern void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry);
 
-/*
- * This is used on exit from the driver to designate the next irq handler
- * and start the safety time safely and atomically from the IRQ handler
- * with respect to the command issue (which it also does)
- */
 extern void ide_execute_command(ide_drive_t *, task_ioreg_t cmd, ide_handler_t *, unsigned int, ide_expiry_t *);
 
 ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8);
 
-/*
- * ide_error() takes action based on the error returned by the controller.
- * The caller should return immediately after invoking this.
- *
- * (drive, msg, status)
- */
 ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
 
 ide_startstop_t __ide_abort(ide_drive_t *, struct request *);
 
-/*
- * Abort a running command on the controller triggering the abort
- * from a host side, non error situation
- * (drive, msg)
- */
 extern ide_startstop_t ide_abort(ide_drive_t *, const char *);
 
 extern void ide_fix_driveid(struct hd_driveid *);
@@ -1075,15 +1043,8 @@
 
 int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
 
-/*
- * Start a reset operation for an IDE interface.
- * The caller should return immediately after invoking this.
- */
 extern ide_startstop_t ide_do_reset (ide_drive_t *);
 
-/*
- * This function is intended to be used prior to invoking ide_do_drive_cmd().
- */
 extern void ide_init_drive_cmd (struct request *rq);
 
 /*
@@ -1098,13 +1059,6 @@
 
 extern int ide_do_drive_cmd(ide_drive_t *, struct request *, ide_action_t);
 
-/*
- * Clean up after success/failure of an explicit drive cmd.
- * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_CMD).
- * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASK_MASK).
- *
- * (ide_drive_t *drive, u8 stat, u8 err)
- */
 extern void ide_end_drive_cmd(ide_drive_t *, u8, u8);
 
 /*
@@ -1177,10 +1131,6 @@
 
 extern int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout);
 
-/*
- * ide_stall_queue() can be used by a drive to give excess bandwidth back
- * to the hwgroup by sleeping for timeout jiffies.
- */
 extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
 
 extern int ide_spin_wait_hwgroup(ide_drive_t *);
@@ -1200,8 +1150,8 @@
 #define ide_pci_register_driver(d) pci_register_driver(d)
 #endif
 
-void ide_pci_setup_ports(struct pci_dev *, struct ide_pci_device_s *, int, ata_index_t *);
-extern void ide_setup_pci_noise (struct pci_dev *dev, struct ide_pci_device_s *d);
+void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, int, u8 *);
+void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *);
 
 extern void default_hwif_iops(ide_hwif_t *);
 extern void default_hwif_mmiops(ide_hwif_t *);
@@ -1261,6 +1211,14 @@
 	IDE_HFLAG_SERIALIZE		= (1 << 20),
 	/* use legacy IRQs */
 	IDE_HFLAG_LEGACY_IRQS		= (1 << 21),
+	/* force use of legacy IRQs */
+	IDE_HFLAG_FORCE_LEGACY_IRQS	= (1 << 22),
+	/* limit LBA48 requests to 256 sectors */
+	IDE_HFLAG_RQSIZE_256		= (1 << 23),
+	/* use 32-bit I/O ops */
+	IDE_HFLAG_IO_32BIT		= (1 << 24),
+	/* unmask IRQs */
+	IDE_HFLAG_UNMASK_IRQS		= (1 << 25),
 };
 
 #ifdef CONFIG_BLK_DEV_OFFBOARD
@@ -1269,7 +1227,7 @@
 # define IDE_HFLAG_OFF_BOARD	0
 #endif
 
-typedef struct ide_pci_device_s {
+struct ide_port_info {
 	char			*name;
 	unsigned int		(*init_chipset)(struct pci_dev *, const char *);
 	void			(*init_iops)(ide_hwif_t *);
@@ -1277,17 +1235,17 @@
 	void			(*init_dma)(ide_hwif_t *, unsigned long);
 	void			(*fixup)(ide_hwif_t *);
 	ide_pci_enablebit_t	enablebits[2];
+	hwif_chipset_t		chipset;
 	unsigned int		extra;
-	struct ide_pci_device_s	*next;
 	u32			host_flags;
 	u8			pio_mask;
 	u8			swdma_mask;
 	u8			mwdma_mask;
 	u8			udma_mask;
-} ide_pci_device_t;
+};
 
-extern int ide_setup_pci_device(struct pci_dev *, ide_pci_device_t *);
-extern int ide_setup_pci_devices(struct pci_dev *, struct pci_dev *, ide_pci_device_t *);
+int ide_setup_pci_device(struct pci_dev *, const struct ide_port_info *);
+int ide_setup_pci_devices(struct pci_dev *, struct pci_dev *, const struct ide_port_info *);
 
 void ide_map_sg(ide_drive_t *, struct request *);
 void ide_init_sg_cmd(ide_drive_t *, struct request *);
@@ -1370,8 +1328,7 @@
 
 void ide_undecoded_slave(ide_hwif_t *);
 
-int probe_hwif_init_with_fixup(ide_hwif_t *, void (*)(ide_hwif_t *));
-extern int probe_hwif_init(ide_hwif_t *);
+int ide_device_add(u8 idx[4]);
 
 static inline void *ide_get_hwifdata (ide_hwif_t * hwif)
 {
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d4b2f1c7..cae35b6 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -67,9 +67,6 @@
 	.posix_timers	 = LIST_HEAD_INIT(sig.posix_timers),		\
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
-	.pgrp		= 0,						\
-	.tty_old_pgrp   = NULL,						\
-	{ .__session      = 0},						\
 }
 
 extern struct nsproxy init_nsproxy;
@@ -94,15 +91,18 @@
 
 #define INIT_STRUCT_PID {						\
 	.count 		= ATOMIC_INIT(1),				\
-	.nr		= 0, 						\
-	/* Don't put this struct pid in pid_hash */			\
-	.pid_chain	= { .next = NULL, .pprev = NULL },		\
 	.tasks		= {						\
 		{ .first = &init_task.pids[PIDTYPE_PID].node },		\
 		{ .first = &init_task.pids[PIDTYPE_PGID].node },	\
 		{ .first = &init_task.pids[PIDTYPE_SID].node },		\
 	},								\
 	.rcu		= RCU_HEAD_INIT,				\
+	.level		= 0,						\
+	.numbers	= { {						\
+		.nr		= 0,					\
+		.ns		= &init_pid_ns,				\
+		.pid_chain	= { .next = NULL, .pprev = NULL },	\
+	}, }								\
 }
 
 #define INIT_PID_LINK(type) 					\
diff --git a/include/linux/input.h b/include/linux/input.h
index f30da6f..6226892 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -98,6 +98,7 @@
 #define EV_PWR			0x16
 #define EV_FF_STATUS		0x17
 #define EV_MAX			0x1f
+#define EV_CNT			(EV_MAX+1)
 
 /*
  * Synchronization events.
@@ -567,6 +568,7 @@
 /* We avoid low common keys in module aliases so they don't get huge. */
 #define KEY_MIN_INTERESTING	KEY_MUTE
 #define KEY_MAX			0x1ff
+#define KEY_CNT			(KEY_MAX+1)
 
 /*
  * Relative axes
@@ -583,6 +585,7 @@
 #define REL_WHEEL		0x08
 #define REL_MISC		0x09
 #define REL_MAX			0x0f
+#define REL_CNT			(REL_MAX+1)
 
 /*
  * Absolute axes
@@ -615,6 +618,7 @@
 #define ABS_VOLUME		0x20
 #define ABS_MISC		0x28
 #define ABS_MAX			0x3f
+#define ABS_CNT			(ABS_MAX+1)
 
 /*
  * Switch events
@@ -625,6 +629,7 @@
 #define SW_HEADPHONE_INSERT	0x02  /* set = inserted */
 #define SW_RADIO		0x03  /* set = radio enabled */
 #define SW_MAX			0x0f
+#define SW_CNT			(SW_MAX+1)
 
 /*
  * Misc events
@@ -636,6 +641,7 @@
 #define MSC_RAW			0x03
 #define MSC_SCAN		0x04
 #define MSC_MAX			0x07
+#define MSC_CNT			(MSC_MAX+1)
 
 /*
  * LEDs
@@ -653,6 +659,7 @@
 #define LED_MAIL		0x09
 #define LED_CHARGING		0x0a
 #define LED_MAX			0x0f
+#define LED_CNT			(LED_MAX+1)
 
 /*
  * Autorepeat values
@@ -670,6 +677,7 @@
 #define SND_BELL		0x01
 #define SND_TONE		0x02
 #define SND_MAX			0x07
+#define SND_CNT			(SND_MAX+1)
 
 /*
  * IDs.
@@ -920,6 +928,7 @@
 #define FF_AUTOCENTER	0x61
 
 #define FF_MAX		0x7f
+#define FF_CNT		(FF_MAX+1)
 
 #ifdef __KERNEL__
 
@@ -932,10 +941,6 @@
 #include <linux/timer.h>
 #include <linux/mod_devicetable.h>
 
-#define NBITS(x) (((x)/BITS_PER_LONG)+1)
-#define BIT(x)	(1UL<<((x)%BITS_PER_LONG))
-#define LONG(x) ((x)/BITS_PER_LONG)
-
 /**
  * struct input_dev - represents an input device
  * @name: name of the device
@@ -1005,28 +1010,30 @@
  * @going_away: marks devices that are in a middle of unregistering and
  *	causes input_open_device*() fail with -ENODEV.
  * @dev: driver model's view of this device
+ * @cdev: union for struct device pointer
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
  */
 struct input_dev {
-
+	/* private: */
 	void *private;	/* do not use */
+	/* public: */
 
 	const char *name;
 	const char *phys;
 	const char *uniq;
 	struct input_id id;
 
-	unsigned long evbit[NBITS(EV_MAX)];
-	unsigned long keybit[NBITS(KEY_MAX)];
-	unsigned long relbit[NBITS(REL_MAX)];
-	unsigned long absbit[NBITS(ABS_MAX)];
-	unsigned long mscbit[NBITS(MSC_MAX)];
-	unsigned long ledbit[NBITS(LED_MAX)];
-	unsigned long sndbit[NBITS(SND_MAX)];
-	unsigned long ffbit[NBITS(FF_MAX)];
-	unsigned long swbit[NBITS(SW_MAX)];
+	unsigned long evbit[BITS_TO_LONGS(EV_CNT)];
+	unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long relbit[BITS_TO_LONGS(REL_CNT)];
+	unsigned long absbit[BITS_TO_LONGS(ABS_CNT)];
+	unsigned long mscbit[BITS_TO_LONGS(MSC_CNT)];
+	unsigned long ledbit[BITS_TO_LONGS(LED_CNT)];
+	unsigned long sndbit[BITS_TO_LONGS(SND_CNT)];
+	unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
+	unsigned long swbit[BITS_TO_LONGS(SW_CNT)];
 
 	unsigned int keycodemax;
 	unsigned int keycodesize;
@@ -1044,10 +1051,10 @@
 	int abs[ABS_MAX + 1];
 	int rep[REP_MAX + 1];
 
-	unsigned long key[NBITS(KEY_MAX)];
-	unsigned long led[NBITS(LED_MAX)];
-	unsigned long snd[NBITS(SND_MAX)];
-	unsigned long sw[NBITS(SW_MAX)];
+	unsigned long key[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long led[BITS_TO_LONGS(LED_CNT)];
+	unsigned long snd[BITS_TO_LONGS(SND_CNT)];
+	unsigned long sw[BITS_TO_LONGS(SW_CNT)];
 
 	int absmax[ABS_MAX + 1];
 	int absmin[ABS_MAX + 1];
@@ -1291,7 +1298,7 @@
 	dev->absfuzz[axis] = fuzz;
 	dev->absflat[axis] = flat;
 
-	dev->absbit[LONG(axis)] |= BIT(axis);
+	dev->absbit[BIT_WORD(axis)] |= BIT_MASK(axis);
 }
 
 extern struct class input_class;
@@ -1332,7 +1339,7 @@
 
 	void *private;
 
-	unsigned long ffbit[NBITS(FF_MAX)];
+	unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
 
 	struct mutex mutex;
 
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index ee11183..408696e 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -89,6 +89,7 @@
 {
 	spinlock_t	lock;
 	int		deleted;
+	int		id;
 	key_t		key;
 	uid_t		uid;
 	gid_t		gid;
@@ -110,6 +111,8 @@
 	int		msg_ctlmax;
 	int		msg_ctlmnb;
 	int		msg_ctlmni;
+	atomic_t	msg_bytes;
+	atomic_t	msg_hdrs;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index a3abf51..16e7ed8 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -58,7 +58,7 @@
  * CONFIG_JBD_DEBUG is on.
  */
 #define JBD_EXPENSIVE_CHECKING
-extern int journal_enable_debug;
+extern u8 journal_enable_debug;
 
 #define jbd_debug(n, f, a...)						\
 	do {								\
@@ -248,17 +248,7 @@
 #include <linux/fs.h>
 #include <linux/sched.h>
 
-#define JBD_ASSERTIONS
-#ifdef JBD_ASSERTIONS
-#define J_ASSERT(assert)						\
-do {									\
-	if (!(assert)) {						\
-		printk (KERN_EMERG					\
-			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
-			__FUNCTION__, __FILE__, __LINE__, # assert);	\
-		BUG();							\
-	}								\
-} while (0)
+#define J_ASSERT(assert)	BUG_ON(!(assert))
 
 #if defined(CONFIG_BUFFER_DEBUG)
 void buffer_assertion_failure(struct buffer_head *bh);
@@ -274,10 +264,6 @@
 #define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
 #endif
 
-#else
-#define J_ASSERT(assert)	do { } while (0)
-#endif		/* JBD_ASSERTIONS */
-
 #if defined(JBD_PARANOID_IOFAIL)
 #define J_EXPECT(expr, why...)		J_ASSERT(expr)
 #define J_EXPECT_BH(bh, expr, why...)	J_ASSERT_BH(bh, expr)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ad4b82c..2d9c448 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -187,6 +187,8 @@
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
+int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
+		unsigned long long *crash_size, unsigned long long *crash_base);
 
 #else /* !CONFIG_KEXEC */
 struct pt_regs;
diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h
index 33b5c2e..65c2d70 100644
--- a/include/linux/keyboard.h
+++ b/include/linux/keyboard.h
@@ -23,10 +23,21 @@
 #define MAX_NR_OF_USER_KEYMAPS 256 	/* should be at least 7 */
 
 #ifdef __KERNEL__
+struct notifier_block;
 extern const int NR_TYPES;
 extern const int max_vals[];
 extern unsigned short *key_maps[MAX_NR_KEYMAPS];
 extern unsigned short plain_map[NR_KEYS];
+
+struct keyboard_notifier_param {
+	struct vc_data *vc;	/* VC on which the keyboard press was done */
+	int down;		/* Pressure of the key? */
+	int shift;		/* Current shift mask */
+	unsigned int value;	/* keycode, unicode value or keysym */
+};
+
+extern int register_keyboard_notifier(struct notifier_block *nb);
+extern int unregister_keyboard_notifier(struct notifier_block *nb);
 #endif
 
 #define MAX_NR_FUNC	256	/* max nr of strings assigned to keys */
diff --git a/include/linux/list.h b/include/linux/list.h
index b0cf0135..75ce2cb 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -478,8 +478,7 @@
 		pos = n, n = pos->next)
 
 /**
- * list_for_each_prev_safe - iterate over a list backwards safe against removal
-			of list entry
+ * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
  * @pos:	the &struct list_head to use as a loop cursor.
  * @n:		another &struct list_head to use as temporary storage
  * @head:	the head for your list.
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index f6279f6..4c4d236 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -276,6 +276,14 @@
 				 (lock)->dep_map.key, sub)
 
 /*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+	{ .name = (_name), .key = (void *)(_key), }
+
+
+/*
  * Acquire a lock.
  *
  * Values for "read":
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 722d475..1fa0c2c 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -37,6 +37,7 @@
 
 #define SMB_SUPER_MAGIC		0x517B
 #define USBDEVICE_SUPER_MAGIC	0x9fa2
+#define CGROUP_SUPER_MAGIC	0x27e0eb
 
 #define FUTEXFS_SUPER_MAGIC	0xBAD1DEA
 #define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
diff --git a/include/linux/marker.h b/include/linux/marker.h
new file mode 100644
index 0000000..5f36cf9
--- /dev/null
+++ b/include/linux/marker.h
@@ -0,0 +1,129 @@
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+
+struct module;
+struct marker;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @mdata: pointer of type struct marker
+ * @private_data: caller site private data
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(const struct marker *mdata,
+	void *private_data, const char *fmt, ...);
+
+struct marker {
+	const char *name;	/* Marker name */
+	const char *format;	/* Marker format string, describing the
+				 * variable argument list.
+				 */
+	char state;		/* Marker state. */
+	marker_probe_func *call;/* Probe handler function pointer */
+	void *private;		/* Private probe data */
+} __attribute__((aligned(8)));
+
+#ifdef CONFIG_MARKERS
+
+/*
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug.
+ * Make sure the alignment of the structure in the __markers section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define __trace_mark(name, call_data, format, args...)			\
+	do {								\
+		static const char __mstrtab_name_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #name;						\
+		static const char __mstrtab_format_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= format;						\
+		static struct marker __mark_##name			\
+		__attribute__((section("__markers"), aligned(8))) =	\
+		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
+		0, __mark_empty_function, NULL };			\
+		__mark_check_format(format, ## args);			\
+		if (unlikely(__mark_##name.state)) {			\
+			preempt_disable();				\
+			(*__mark_##name.call)				\
+				(&__mark_##name, call_data,		\
+				format, ## args);			\
+			preempt_enable();				\
+		}							\
+	} while (0)
+
+extern void marker_update_probe_range(struct marker *begin,
+	struct marker *end, struct module *probe_module, int *refcount);
+#else /* !CONFIG_MARKERS */
+#define __trace_mark(name, call_data, format, args...) \
+		__mark_check_format(format, ## args)
+static inline void marker_update_probe_range(struct marker *begin,
+	struct marker *end, struct module *probe_module, int *refcount)
+{ }
+#endif /* CONFIG_MARKERS */
+
+/**
+ * trace_mark - Marker
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker.
+ */
+#define trace_mark(name, format, args...) \
+	__trace_mark(name, NULL, format, ## args)
+
+#define MARK_MAX_FORMAT_LEN	1024
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+/* To be used for string format validity checking with gcc */
+static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...)
+{
+}
+
+extern marker_probe_func __mark_empty_function;
+
+/*
+ * Connect a probe to a marker.
+ * private data pointer must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *name, const char *format,
+				marker_probe_func *probe, void *private);
+
+/*
+ * Returns the private data given to marker_probe_register.
+ */
+extern void *marker_probe_unregister(const char *name);
+/*
+ * Unregister a marker by providing the registered private data.
+ */
+extern void *marker_probe_unregister_private_data(void *private);
+
+extern int marker_arm(const char *name);
+extern int marker_disarm(const char *name);
+extern void *marker_get_private_data(const char *name);
+
+#endif
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 38c04d6..59c4865 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -148,14 +148,6 @@
 					const nodemask_t *new);
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 extern void mpol_fix_fork_child_flag(struct task_struct *p);
-#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
-
-#ifdef CONFIG_CPUSETS
-#define current_cpuset_is_being_rebound() \
-				(cpuset_being_rebound == current->cpuset)
-#else
-#define current_cpuset_is_being_rebound() 0
-#endif
 
 extern struct mempolicy default_policy;
 extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
@@ -173,8 +165,6 @@
 int do_migrate_pages(struct mm_struct *mm,
 	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
 
-extern void *cpuset_being_rebound;	/* Trigger mpol_copy vma rebind */
-
 #else
 
 struct mempolicy {};
@@ -248,8 +238,6 @@
 {
 }
 
-#define set_cpuset_being_rebound(x) do {} while (0)
-
 static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
  		unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol)
 {
diff --git a/include/linux/module.h b/include/linux/module.h
index 642f325..2cbc0b8 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -15,6 +15,7 @@
 #include <linux/stringify.h>
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
+#include <linux/marker.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -354,6 +355,10 @@
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */
 	char *args;
+#ifdef CONFIG_MARKERS
+	struct marker *markers;
+	unsigned int num_markers;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
@@ -457,6 +462,8 @@
 
 extern void print_modules(void);
 
+extern void module_update_markers(struct module *probe_module, int *refcount);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -556,6 +563,11 @@
 {
 }
 
+static inline void module_update_markers(struct module *probe_module,
+		int *refcount)
+{
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
diff --git a/include/linux/msg.h b/include/linux/msg.h
index f1b60740d..10a3d5a 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -77,7 +77,6 @@
 /* one msq_queue structure for each present queue on the system */
 struct msg_queue {
 	struct kern_ipc_perm q_perm;
-	int q_id;
 	time_t q_stime;			/* last msgsnd time */
 	time_t q_rtime;			/* last msgrcv time */
 	time_t q_ctime;			/* last change time */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c5164c2..e82a6eb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -160,6 +160,12 @@
 	/* Open contexts for shared mmap writes */
 	struct list_head	open_files;
 
+	/* Number of in-flight sillydelete RPC calls */
+	atomic_t		silly_count;
+	/* List of deferred sillydelete requests */
+	struct hlist_head	silly_list;
+	wait_queue_head_t	waitqueue;
+
 #ifdef CONFIG_NFS_V4
 	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */
@@ -394,6 +400,8 @@
  */
 extern int  nfs_async_unlink(struct inode *dir, struct dentry *dentry);
 extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
+extern void nfs_block_sillyrename(struct dentry *dentry);
+extern void nfs_unblock_sillyrename(struct dentry *dentry);
 
 /*
  * linux/fs/nfs/write.c
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index fad7ff1..0c40cc0 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -231,5 +231,22 @@
 #define PM_SUSPEND_PREPARE	0x0003 /* Going to suspend the system */
 #define PM_POST_SUSPEND		0x0004 /* Suspend finished */
 
+/* Console keyboard events.
+ * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and
+ * KBD_KEYSYM. */
+#define KBD_KEYCODE		0x0001 /* Keyboard keycode, called before any other */
+#define KBD_UNBOUND_KEYCODE	0x0002 /* Keyboard keycode which is not bound to any other */
+#define KBD_UNICODE		0x0003 /* Keyboard unicode */
+#define KBD_KEYSYM		0x0004 /* Keyboard keysym */
+#define KBD_POST_KEYSYM		0x0005 /* Called after keyboard keysym interpretation */
+
+extern struct blocking_notifier_head reboot_notifier_list;
+
+/* Virtual Terminal events. */
+#define VT_ALLOCATE		0x0001 /* Console got allocated */
+#define VT_DEALLOCATE		0x0002 /* Console will be deallocated */
+#define VT_WRITE		0x0003 /* A char got output */
+#define VT_UPDATE		0x0004 /* A bigger update occurred */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_NOTIFIER_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 033a648..0e66b57 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -32,8 +32,39 @@
 };
 extern struct nsproxy init_nsproxy;
 
+/*
+ * the namespaces access rules are:
+ *
+ *  1. only current task is allowed to change tsk->nsproxy pointer or
+ *     any pointer on the nsproxy itself
+ *
+ *  2. when accessing (i.e. reading) current task's namespaces - no
+ *     precautions should be taken - just dereference the pointers
+ *
+ *  3. the access to other task namespaces is performed like this
+ *     rcu_read_lock();
+ *     nsproxy = task_nsproxy(tsk);
+ *     if (nsproxy != NULL) {
+ *             / *
+ *               * work with the namespaces here
+ *               * e.g. get the reference on one of them
+ *               * /
+ *     } / *
+ *         * NULL task_nsproxy() means that this task is
+ *         * almost dead (zombie)
+ *         * /
+ *     rcu_read_unlock();
+ *
+ */
+
+static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
+{
+	return rcu_dereference(tsk->nsproxy);
+}
+
 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
-void get_task_namespaces(struct task_struct *tsk);
+void exit_task_namespaces(struct task_struct *tsk);
+void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
 void free_nsproxy(struct nsproxy *ns);
 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
 	struct fs_struct *);
@@ -45,14 +76,15 @@
 	}
 }
 
-static inline void exit_task_namespaces(struct task_struct *p)
+static inline void get_nsproxy(struct nsproxy *ns)
 {
-	struct nsproxy *ns = p->nsproxy;
-	if (ns) {
-		task_lock(p);
-		p->nsproxy = NULL;
-		task_unlock(p);
-		put_nsproxy(ns);
-	}
+	atomic_inc(&ns->count);
 }
+
+#ifdef CONFIG_CGROUP_NS
+int ns_cgroup_clone(struct task_struct *tsk);
+#else
+static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
+#endif
+
 #endif
diff --git a/include/linux/of.h b/include/linux/of.h
index 6df80e9..5c39b92 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -16,8 +16,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/types.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/prom.h>
 
 /* flag descriptions */
diff --git a/include/linux/phantom.h b/include/linux/phantom.h
index d3ebbfa..96f4048 100644
--- a/include/linux/phantom.h
+++ b/include/linux/phantom.h
@@ -30,7 +30,11 @@
 #define PHN_SET_REG		_IOW (PH_IOC_MAGIC, 1, struct phm_reg *)
 #define PHN_GET_REGS		_IOWR(PH_IOC_MAGIC, 2, struct phm_regs *)
 #define PHN_SET_REGS		_IOW (PH_IOC_MAGIC, 3, struct phm_regs *)
-#define PH_IOC_MAXNR		3
+/* this ioctl tells the driver, that the caller is not OpenHaptics and might
+ * use improved registers update (no more phantom switchoffs when using
+ * libphantom) */
+#define PHN_NOT_OH		_IO  (PH_IOC_MAGIC, 4)
+#define PH_IOC_MAXNR		4
 
 #define PHN_CONTROL		0x6     /* control byte in iaddr space */
 #define PHN_CTL_AMP		0x1     /*   switch after torques change */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 1e0e4e3..e29a900 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -40,15 +40,28 @@
  * processes.
  */
 
+
+/*
+ * struct upid is used to get the id of the struct pid, as it is
+ * seen in particular namespace. Later the struct pid is found with
+ * find_pid_ns() using the int nr and struct pid_namespace *ns.
+ */
+
+struct upid {
+	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
+	int nr;
+	struct pid_namespace *ns;
+	struct hlist_node pid_chain;
+};
+
 struct pid
 {
 	atomic_t count;
-	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
-	int nr;
-	struct hlist_node pid_chain;
 	/* lists of tasks that use this pid */
 	struct hlist_head tasks[PIDTYPE_MAX];
 	struct rcu_head rcu;
+	int level;
+	struct upid numbers[1];
 };
 
 extern struct pid init_struct_pid;
@@ -83,26 +96,60 @@
 extern void FASTCALL(transfer_pid(struct task_struct *old,
 				  struct task_struct *new, enum pid_type));
 
+struct pid_namespace;
+extern struct pid_namespace init_pid_ns;
+
 /*
  * look up a PID in the hash table. Must be called with the tasklist_lock
  * or rcu_read_lock() held.
+ *
+ * find_pid_ns() finds the pid in the namespace specified
+ * find_pid() find the pid by its global id, i.e. in the init namespace
+ * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
+ *
+ * see also find_task_by_pid() set in include/linux/sched.h
  */
-extern struct pid *FASTCALL(find_pid(int nr));
+extern struct pid *FASTCALL(find_pid_ns(int nr, struct pid_namespace *ns));
+extern struct pid *find_vpid(int nr);
+extern struct pid *find_pid(int nr);
 
 /*
  * Lookup a PID in the hash table, and return with it's count elevated.
  */
 extern struct pid *find_get_pid(int nr);
-extern struct pid *find_ge_pid(int nr);
+extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 
-extern struct pid *alloc_pid(void);
+extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void FASTCALL(free_pid(struct pid *pid));
+extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
+
+/*
+ * the helpers to get the pid's id seen from different namespaces
+ *
+ * pid_nr()    : global id, i.e. the id seen from the init namespace;
+ * pid_vnr()   : virtual id, i.e. the id seen from the namespace this pid
+ *               belongs to. this only makes sence when called in the
+ *               context of the task that belongs to the same namespace;
+ * pid_nr_ns() : id seen from the ns specified.
+ *
+ * see also task_xid_nr() etc in include/linux/sched.h
+ */
 
 static inline pid_t pid_nr(struct pid *pid)
 {
 	pid_t nr = 0;
 	if (pid)
-		nr = pid->nr;
+		nr = pid->numbers[0].nr;
+	return nr;
+}
+
+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
+
+static inline pid_t pid_vnr(struct pid *pid)
+{
+	pid_t nr = 0;
+	if (pid)
+		nr = pid->numbers[pid->level].nr;
 	return nr;
 }
 
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index b9a17e0..0135c76 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -4,7 +4,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/threads.h>
-#include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/kref.h>
 
@@ -20,13 +19,21 @@
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
 	struct task_struct *child_reaper;
+	struct kmem_cache *pid_cachep;
+	int level;
+	struct pid_namespace *parent;
+#ifdef CONFIG_PROC_FS
+	struct vfsmount *proc_mnt;
+#endif
 };
 
 extern struct pid_namespace init_pid_ns;
 
-static inline void get_pid_ns(struct pid_namespace *ns)
+static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 {
-	kref_get(&ns->kref);
+	if (ns != &init_pid_ns)
+		kref_get(&ns->kref);
+	return ns;
 }
 
 extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
@@ -34,12 +41,19 @@
 
 static inline void put_pid_ns(struct pid_namespace *ns)
 {
-	kref_put(&ns->kref, free_pid_ns);
+	if (ns != &init_pid_ns)
+		kref_put(&ns->kref, free_pid_ns);
 }
 
-static inline struct task_struct *child_reaper(struct task_struct *tsk)
+static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
 {
-	return init_pid_ns.child_reaper;
+	return tsk->nsproxy->pid_ns;
+}
+
+static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
+{
+	BUG_ON(tsk != current);
+	return tsk->nsproxy->pid_ns->child_reaper;
 }
 
 #endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
index 1adfe66..af7c36a 100644
--- a/include/linux/prefetch.h
+++ b/include/linux/prefetch.h
@@ -34,17 +34,12 @@
 	
 */
 
-/*
- *	These cannot be do{}while(0) macros. See the mental gymnastics in
- *	the loop macro.
- */
- 
 #ifndef ARCH_HAS_PREFETCH
-static inline void prefetch(const void *x) {;}
+#define prefetch(x) __builtin_prefetch(x)
 #endif
 
 #ifndef ARCH_HAS_PREFETCHW
-static inline void prefetchw(const void *x) {;}
+#define prefetchw(x) __builtin_prefetch(x,1)
 #endif
 
 #ifndef ARCH_HAS_SPINLOCK_PREFETCH
diff --git a/include/linux/prio_heap.h b/include/linux/prio_heap.h
new file mode 100644
index 0000000..0809435
--- /dev/null
+++ b/include/linux/prio_heap.h
@@ -0,0 +1,58 @@
+#ifndef _LINUX_PRIO_HEAP_H
+#define _LINUX_PRIO_HEAP_H
+
+/*
+ * Simple insertion-only static-sized priority heap containing
+ * pointers, based on CLR, chapter 7
+ */
+
+#include <linux/gfp.h>
+
+/**
+ * struct ptr_heap - simple static-sized priority heap
+ * @ptrs - pointer to data area
+ * @max - max number of elements that can be stored in @ptrs
+ * @size - current number of valid elements in @ptrs (in the range 0..@size-1
+ * @gt: comparison operator, which should implement "greater than"
+ */
+struct ptr_heap {
+	void **ptrs;
+	int max;
+	int size;
+	int (*gt)(void *, void *);
+};
+
+/**
+ * heap_init - initialize an empty heap with a given memory size
+ * @heap: the heap structure to be initialized
+ * @size: amount of memory to use in bytes
+ * @gfp_mask: mask to pass to kmalloc()
+ * @gt: comparison operator, which should implement "greater than"
+ */
+extern int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
+		     int (*gt)(void *, void *));
+
+/**
+ * heap_free - release a heap's storage
+ * @heap: the heap structure whose data should be released
+ */
+void heap_free(struct ptr_heap *heap);
+
+/**
+ * heap_insert - insert a value into the heap and return any overflowed value
+ * @heap: the heap to be operated on
+ * @p: the pointer to be inserted
+ *
+ * Attempts to insert the given value into the priority heap. If the
+ * heap is full prior to the insertion, then the resulting heap will
+ * consist of the smallest @max elements of the original heap and the
+ * new element; the greatest element will be removed from the heap and
+ * returned. Note that the returned element will be the new element
+ * (i.e. no change to the heap) if the new element is greater than all
+ * elements currently in the heap.
+ */
+extern void *heap_insert(struct ptr_heap *heap, void *p);
+
+
+
+#endif /* _LINUX_PRIO_HEAP_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 20741f6..1ff4616 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -125,7 +125,8 @@
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
 
 extern struct vfsmount *proc_mnt;
-extern int proc_fill_super(struct super_block *,void *,int);
+struct pid_namespace;
+extern int proc_fill_super(struct super_block *);
 extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
 
 /*
@@ -142,6 +143,9 @@
 extern const struct file_operations proc_kmsg_operations;
 extern const struct file_operations ppc_htab_operations;
 
+extern int pid_ns_prepare_proc(struct pid_namespace *ns);
+extern void pid_ns_release_proc(struct pid_namespace *ns);
+
 /*
  * proc_tty.c
  */
@@ -207,7 +211,9 @@
 #define proc_net_create(net, name, mode, info)	({ (void)(mode), NULL; })
 static inline void proc_net_remove(struct net *net, const char *name) {}
 
-static inline void proc_flush_task(struct task_struct *task) { }
+static inline void proc_flush_task(struct task_struct *task)
+{
+}
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
@@ -232,6 +238,15 @@
 
 extern struct proc_dir_entry proc_root;
 
+static inline int pid_ns_prepare_proc(struct pid_namespace *ns)
+{
+	return 0;
+}
+
+static inline void pid_ns_release_proc(struct pid_namespace *ns)
+{
+}
+
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 8dcf237..72bfccd 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -85,7 +85,7 @@
 if( !( cond ) ) 								\
   reiserfs_panic( NULL, "reiserfs[%i]: assertion " scond " failed at "	\
 		  __FILE__ ":%i:%s: " format "\n",		\
-		  in_interrupt() ? -1 : current -> pid, __LINE__ , __FUNCTION__ , ##args )
+		  in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __FUNCTION__ , ##args )
 
 #define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
 
@@ -283,6 +283,18 @@
 	return sb->s_fs_info;
 }
 
+/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
+ * which overflows on large file systems. */
+static inline u32 reiserfs_bmap_count(struct super_block *sb)
+{
+	return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
+}
+
+static inline int bmap_would_wrap(unsigned bmap_nr)
+{
+	return bmap_nr > ((1LL << 16) - 1);
+}
+
 /** this says about version of key of all items (but stat data) the
     object consists of */
 #define get_inode_item_key_version( inode )                                    \
@@ -1734,8 +1746,8 @@
 int journal_mark_freed(struct reiserfs_transaction_handle *,
 		       struct super_block *, b_blocknr_t blocknr);
 int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
-int reiserfs_in_journal(struct super_block *p_s_sb, int bmap_nr, int bit_nr,
-			int searchall, b_blocknr_t * next);
+int reiserfs_in_journal(struct super_block *p_s_sb, unsigned int bmap_nr,
+			int bit_nr, int searchall, b_blocknr_t *next);
 int journal_begin(struct reiserfs_transaction_handle *,
 		  struct super_block *p_s_sb, unsigned long);
 int journal_join_abort(struct reiserfs_transaction_handle *,
@@ -1743,7 +1755,7 @@
 void reiserfs_journal_abort(struct super_block *sb, int errno);
 void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
 int reiserfs_allocate_list_bitmaps(struct super_block *s,
-				   struct reiserfs_list_bitmap *, int);
+				   struct reiserfs_list_bitmap *, unsigned int);
 
 void add_save_link(struct reiserfs_transaction_handle *th,
 		   struct inode *inode, int truncate);
@@ -2041,7 +2053,7 @@
  * arguments, such as node, search path, transaction_handle, etc. */
 struct __reiserfs_blocknr_hint {
 	struct inode *inode;	/* inode passed to allocator, if we allocate unf. nodes */
-	long block;		/* file offset, in blocks */
+	sector_t block;		/* file offset, in blocks */
 	struct in_core_key key;
 	struct treepath *path;	/* search path, used by allocator to deternine search_start by
 				 * various ways */
@@ -2099,7 +2111,8 @@
 static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
 					    *th, struct inode *inode,
 					    b_blocknr_t * new_blocknrs,
-					    struct treepath *path, long block)
+					    struct treepath *path,
+					    sector_t block)
 {
 	reiserfs_blocknr_hint_t hint = {
 		.th = th,
@@ -2116,7 +2129,8 @@
 static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle
 					     *th, struct inode *inode,
 					     b_blocknr_t * new_blocknrs,
-					     struct treepath *path, long block)
+					     struct treepath *path,
+					     sector_t block)
 {
 	reiserfs_blocknr_hint_t hint = {
 		.th = th,
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index ff9e923..10fa0c8 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -265,9 +265,7 @@
 typedef __u32(*hashf_t) (const signed char *, int);
 
 struct reiserfs_bitmap_info {
-	// FIXME: Won't work with block sizes > 8K
-	__u16 first_zero_hint;
-	__u16 free_count;
+	__u32 free_count;
 };
 
 struct proc_dir_entry;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 10a83d8..13df99f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,6 +25,7 @@
 #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
 #define CLONE_NEWIPC		0x08000000	/* New ipcs */
 #define CLONE_NEWUSER		0x10000000	/* New user namespace */
+#define CLONE_NEWPID		0x20000000	/* New pid namespace */
 #define CLONE_NEWNET		0x40000000	/* New network namespace */
 
 /*
@@ -428,7 +429,17 @@
 	cputime_t it_prof_incr, it_virt_incr;
 
 	/* job control IDs */
-	pid_t pgrp;
+
+	/*
+	 * pgrp and session fields are deprecated.
+	 * use the task_session_Xnr and task_pgrp_Xnr routines below
+	 */
+
+	union {
+		pid_t pgrp __deprecated;
+		pid_t __pgrp;
+	};
+
 	struct pid *tty_old_pgrp;
 
 	union {
@@ -736,6 +747,8 @@
 #endif
 };
 
+extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new);
+
 #endif	/* CONFIG_SMP */
 
 /*
@@ -756,8 +769,6 @@
 }
 
 struct io_context;			/* See blkdev.h */
-struct cpuset;
-
 #define NGROUPS_SMALL		32
 #define NGROUPS_PER_BLOCK	((int)(PAGE_SIZE / sizeof(gid_t)))
 struct group_info {
@@ -1125,11 +1136,16 @@
 	short il_next;
 #endif
 #ifdef CONFIG_CPUSETS
-	struct cpuset *cpuset;
 	nodemask_t mems_allowed;
 	int cpuset_mems_generation;
 	int cpuset_mem_spread_rotor;
 #endif
+#ifdef CONFIG_CGROUPS
+	/* Control Group info protected by css_set_lock */
+	struct css_set *cgroups;
+	/* cg_list protected by css_set_lock and tsk->alloc_lock */
+	struct list_head cg_list;
+#endif
 #ifdef CONFIG_FUTEX
 	struct robust_list_head __user *robust_list;
 #ifdef CONFIG_COMPAT
@@ -1185,24 +1201,14 @@
 	return rt_prio(p->prio);
 }
 
-static inline pid_t process_group(struct task_struct *tsk)
+static inline void set_task_session(struct task_struct *tsk, pid_t session)
 {
-	return tsk->signal->pgrp;
+	tsk->signal->__session = session;
 }
 
-static inline pid_t signal_session(struct signal_struct *sig)
+static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp)
 {
-	return sig->__session;
-}
-
-static inline pid_t process_session(struct task_struct *tsk)
-{
-	return signal_session(tsk->signal);
-}
-
-static inline void set_signal_session(struct signal_struct *sig, pid_t session)
-{
-	sig->__session = session;
+	tsk->signal->__pgrp = pgrp;
 }
 
 static inline struct pid *task_pid(struct task_struct *task)
@@ -1225,6 +1231,88 @@
 	return task->group_leader->pids[PIDTYPE_SID].pid;
 }
 
+struct pid_namespace;
+
+/*
+ * the helpers to get the task's different pids as they are seen
+ * from various namespaces
+ *
+ * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
+ * task_xid_vnr()    : virtual id, i.e. the id seen from the namespace the task
+ *                     belongs to. this only makes sence when called in the
+ *                     context of the task that belongs to the same namespace;
+ * task_xid_nr_ns()  : id seen from the ns specified;
+ *
+ * set_task_vxid()   : assigns a virtual id to a task;
+ *
+ * task_ppid_nr_ns() : the parent's id as seen from the namespace specified.
+ *                     the result depends on the namespace and whether the
+ *                     task in question is the namespace's init. e.g. for the
+ *                     namespace's init this will return 0 when called from
+ *                     the namespace of this init, or appropriate id otherwise.
+ *
+ *
+ * see also pid_nr() etc in include/linux/pid.h
+ */
+
+static inline pid_t task_pid_nr(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_pid_vnr(struct task_struct *tsk)
+{
+	return pid_vnr(task_pid(tsk));
+}
+
+
+static inline pid_t task_tgid_nr(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+{
+	return pid_vnr(task_tgid(tsk));
+}
+
+
+static inline pid_t task_pgrp_nr(struct task_struct *tsk)
+{
+	return tsk->signal->__pgrp;
+}
+
+pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
+{
+	return pid_vnr(task_pgrp(tsk));
+}
+
+
+static inline pid_t task_session_nr(struct task_struct *tsk)
+{
+	return tsk->signal->__session;
+}
+
+pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_session_vnr(struct task_struct *tsk)
+{
+	return pid_vnr(task_session(tsk));
+}
+
+
+static inline pid_t task_ppid_nr_ns(struct task_struct *tsk,
+		struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_pid(rcu_dereference(tsk->real_parent)), ns);
+}
+
 /**
  * pid_alive - check that a task structure is not stale
  * @p: Task structure to be checked.
@@ -1239,16 +1327,22 @@
 }
 
 /**
- * is_init - check if a task structure is init
+ * is_global_init - check if a task structure is init
  * @tsk: Task structure to be checked.
  *
  * Check if a task structure is the first user space task the kernel created.
  */
-static inline int is_init(struct task_struct *tsk)
+static inline int is_global_init(struct task_struct *tsk)
 {
 	return tsk->pid == 1;
 }
 
+/*
+ * is_container_init:
+ * check whether in the task is init in its own pid namespace.
+ */
+extern int is_container_init(struct task_struct *tsk);
+
 extern struct pid *cad_pid;
 
 extern void free_task(struct task_struct *tsk);
@@ -1420,8 +1514,32 @@
 
 extern struct   mm_struct init_mm;
 
-#define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
-extern struct task_struct *find_task_by_pid_type(int type, int pid);
+extern struct pid_namespace init_pid_ns;
+
+/*
+ * find a task by one of its numerical ids
+ *
+ * find_task_by_pid_type_ns():
+ *      it is the most generic call - it finds a task by all id,
+ *      type and namespace specified
+ * find_task_by_pid_ns():
+ *      finds a task by its pid in the specified namespace
+ * find_task_by_vpid():
+ *      finds a task by its virtual pid
+ * find_task_by_pid():
+ *      finds a task by its global pid
+ *
+ * see also find_pid() etc in include/linux/pid.h
+ */
+
+extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
+		struct pid_namespace *ns);
+
+extern struct task_struct *find_task_by_pid(pid_t nr);
+extern struct task_struct *find_task_by_vpid(pid_t nr);
+extern struct task_struct *find_task_by_pid_ns(pid_t nr,
+		struct pid_namespace *ns);
+
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
 /* per-UID process charging. */
@@ -1608,6 +1726,12 @@
 	return p->pid == p->tgid;
 }
 
+static inline
+int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+{
+	return p1->tgid == p2->tgid;
+}
+
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
 	return list_entry(rcu_dereference(p->thread_group.next),
@@ -1625,7 +1749,8 @@
 /*
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
- * pins the final release of task.io_context.  Also protects ->cpuset.
+ * pins the final release of task.io_context.  Also protects ->cpuset and
+ * ->cgroup.subsys[].
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9aaffb0..c8eaad9 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -90,7 +90,6 @@
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
 	struct kern_ipc_perm	sem_perm;	/* permissions .. see ipc.h */
-	int			sem_id;
 	time_t			sem_otime;	/* last semop time */
 	time_t			sem_ctime;	/* last change time */
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
diff --git a/include/linux/shm.h b/include/linux/shm.h
index bea65d9..eeaed92 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -79,7 +79,6 @@
 {	
 	struct kern_ipc_perm	shm_perm;
 	struct file *		shm_file;
-	int			id;
 	unsigned long		shm_nattch;
 	unsigned long		shm_segsz;
 	time_t			shm_atim;
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9a7252e..f4a1395 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -40,6 +40,7 @@
  * @idle_sleeps:	Number of idle calls, where the sched tick was stopped
  * @idle_entrytime:	Time when the idle call was entered
  * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
+ * @sleep_length:	Duration of the current idle sleep
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
@@ -52,6 +53,7 @@
 	unsigned long			idle_sleeps;
 	ktime_t				idle_entrytime;
 	ktime_t				idle_sleeptime;
+	ktime_t				sleep_length;
 	unsigned long			last_jiffies;
 	unsigned long			next_jiffies;
 	ktime_t				idle_expires;
@@ -100,10 +102,17 @@
 extern void tick_nohz_stop_sched_tick(void);
 extern void tick_nohz_restart_sched_tick(void);
 extern void tick_nohz_update_jiffies(void);
+extern ktime_t tick_nohz_get_sleep_length(void);
 # else
 static inline void tick_nohz_stop_sched_tick(void) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
 static inline void tick_nohz_update_jiffies(void) { }
+static inline ktime_t tick_nohz_get_sleep_length(void)
+{
+	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
+
+	return len;
+}
 # endif /* !NO_HZ */
 
 #endif
diff --git a/include/linux/types.h b/include/linux/types.h
index 0351bf2f..4f0dad2 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -3,12 +3,9 @@
 
 #ifdef	__KERNEL__
 
-#define BITS_TO_LONGS(bits) \
-	(((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
 
-#define BITS_PER_BYTE 8
 #endif
 
 #include <linux/posix_types.h>
diff --git a/include/linux/uinput.h b/include/linux/uinput.h
index a6c1e8e..15ddd44 100644
--- a/include/linux/uinput.h
+++ b/include/linux/uinput.h
@@ -162,10 +162,6 @@
 #define UI_FF_UPLOAD		1
 #define UI_FF_ERASE		2
 
-#ifndef NBITS
-#define NBITS(x) ((((x)-1)/(sizeof(long)*8))+1)
-#endif	/* NBITS */
-
 #define UINPUT_MAX_NAME_SIZE	80
 struct uinput_user_dev {
 	char name[UINPUT_MAX_NAME_SIZE];
diff --git a/include/linux/vt.h b/include/linux/vt.h
index ba806e8..02c1c02 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -1,6 +1,18 @@
 #ifndef _LINUX_VT_H
 #define _LINUX_VT_H
 
+#ifdef __KERNEL__
+struct notifier_block;
+
+struct vt_notifier_param {
+	struct vc_data *vc;	/* VC on which the update happened */
+	unsigned int c;		/* Printed char */
+};
+
+extern int register_vt_notifier(struct notifier_block *nb);
+extern int unregister_vt_notifier(struct notifier_block *nb);
+#endif
+
 /*
  * These constants are also useful for user-level apps (e.g., VC
  * resizing).
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ce6badc..7daafdc 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <linux/lockdep.h>
 #include <asm/atomic.h>
 
 struct workqueue_struct;
@@ -28,6 +29,9 @@
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
@@ -41,10 +45,23 @@
 	struct work_struct work;
 };
 
+#ifdef CONFIG_LOCKDEP
+/*
+ * NB: because we have to copy the lockdep_map, setting _key
+ * here is required, otherwise it could get initialised to the
+ * copy of the lockdep_map!
+ */
+#define __WORK_INIT_LOCKDEP_MAP(n, k) \
+	.lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
+#else
+#define __WORK_INIT_LOCKDEP_MAP(n, k)
+#endif
+
 #define __WORK_INITIALIZER(n, f) {				\
 	.data = WORK_DATA_INIT(),				\
 	.entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
+	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
 	}
 
 #define __DELAYED_WORK_INITIALIZER(n, f) {			\
@@ -76,12 +93,24 @@
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
+#ifdef CONFIG_LOCKDEP
+#define INIT_WORK(_work, _func)						\
+	do {								\
+		static struct lock_class_key __key;			\
+									\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
+		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
+		INIT_LIST_HEAD(&(_work)->entry);			\
+		PREPARE_WORK((_work), (_func));				\
+	} while (0)
+#else
 #define INIT_WORK(_work, _func)						\
 	do {								\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
+#endif
 
 #define INIT_DELAYED_WORK(_work, _func)				\
 	do {							\
@@ -118,9 +147,23 @@
 	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
-extern struct workqueue_struct *__create_workqueue(const char *name,
-						    int singlethread,
-						    int freezeable);
+extern struct workqueue_struct *
+__create_workqueue_key(const char *name, int singlethread,
+		       int freezeable, struct lock_class_key *key);
+
+#ifdef CONFIG_LOCKDEP
+#define __create_workqueue(name, singlethread, freezeable)	\
+({								\
+	static struct lock_class_key __key;			\
+								\
+	__create_workqueue_key((name), (singlethread),		\
+			       (freezeable), &__key);		\
+})
+#else
+#define __create_workqueue(name, singlethread, freezeable)	\
+	__create_workqueue_key((name), (singlethread), (freezeable), NULL)
+#endif
+
 #define create_workqueue(name) __create_workqueue((name), 0, 0)
 #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
 #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 686425a..625346c 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -44,7 +44,7 @@
 do {  \
 	if ((p9_debug_level & level) == level) \
 		printk(KERN_NOTICE "-- %s (%d): " \
-		format , __FUNCTION__, current->pid , ## arg); \
+		format , __FUNCTION__, task_pid_nr(current) , ## arg); \
 } while (0)
 
 #define PRINT_FCALL_ERROR(s, fcall) P9_DPRINTK(P9_DEBUG_ERROR,   \
@@ -59,7 +59,7 @@
 #define P9_EPRINTK(level, format, arg...) \
 do { \
 	printk(level "9p: %s (%d): " \
-		format , __FUNCTION__, current->pid , ## arg); \
+		format , __FUNCTION__, task_pid_nr(current), ## arg); \
 } while (0)
 
 
diff --git a/include/net/scm.h b/include/net/scm.h
index 423cb1d..06df126 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -4,6 +4,8 @@
 #include <linux/limits.h>
 #include <linux/net.h>
 #include <linux/security.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
 
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
@@ -54,7 +56,7 @@
 	struct task_struct *p = current;
 	scm->creds.uid = p->uid;
 	scm->creds.gid = p->gid;
-	scm->creds.pid = p->tgid;
+	scm->creds.pid = task_tgid_vnr(p);
 	scm->fp = NULL;
 	scm->seq = 0;
 	unix_get_peersec_dgram(sock, scm);
diff --git a/include/video/sstfb.h b/include/video/sstfb.h
index baa163f..b52f073 100644
--- a/include/video/sstfb.h
+++ b/include/video/sstfb.h
@@ -68,7 +68,6 @@
 #  define print_var(X,Y...)
 #endif
 
-#define BIT(x)		(1ul<<(x))
 #define POW2(x)		(1ul<<(x))
 
 /*
diff --git a/include/video/tdfx.h b/include/video/tdfx.h
index 05b63c2..7431d96 100644
--- a/include/video/tdfx.h
+++ b/include/video/tdfx.h
@@ -79,8 +79,6 @@
 
 /* register bitfields (not all, only as needed) */
 
-#define BIT(x)	(1UL << (x))
-
 /* COMMAND_2D reg. values */
 #define TDFX_ROP_COPY		0xcc	/* src */
 #define TDFX_ROP_INVERT		0x55	/* NOT dst */
diff --git a/init/Kconfig b/init/Kconfig
index a29a688..541382d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -270,9 +270,43 @@
 		     13 =>  8 KB
 		     12 =>  4 KB
 
+config CGROUPS
+	bool "Control Group support"
+	help
+	  This option will let you use process cgroup subsystems
+	  such as Cpusets
+
+	  Say N if unsure.
+
+config CGROUP_DEBUG
+	bool "Example debug cgroup subsystem"
+	depends on CGROUPS
+	help
+	  This option enables a simple cgroup subsystem that
+	  exports useful debugging information about the cgroups
+	  framework
+
+	  Say N if unsure
+
+config CGROUP_NS
+        bool "Namespace cgroup subsystem"
+        depends on CGROUPS
+        help
+          Provides a simple namespace cgroup subsystem to
+          provide hierarchical naming of sets of namespaces,
+          for instance virtual servers and checkpoint/restart
+          jobs.
+
+config CGROUP_CPUACCT
+	bool "Simple CPU accounting cgroup subsystem"
+	depends on CGROUPS
+	help
+	  Provides a simple Resource Controller for monitoring the
+	  total CPU consumed by the tasks in a cgroup
+
 config CPUSETS
 	bool "Cpuset support"
-	depends on SMP
+	depends on SMP && CGROUPS
 	help
 	  This option will let you create and manage CPUSETs which
 	  allow dynamically partitioning a system into sets of CPUs and
@@ -300,6 +334,16 @@
 	  This option will choose userid as the basis for grouping
 	  tasks, thus providing equal CPU bandwidth to each user.
 
+config FAIR_CGROUP_SCHED
+	bool "Control groups"
+ 	depends on CGROUPS
+ 	help
+	  This option allows you to create arbitrary task groups
+	  using the "cgroup" pseudo filesystem and control
+	  the cpu bandwidth allocated to each such task group.
+	  Refer to Documentation/cgroups.txt for more information
+	  on "cgroup" pseudo filesystem.
+
 endchoice
 
 config SYSFS_DEPRECATED
@@ -322,6 +366,11 @@
 	  If you are using a distro that was released in 2006 or later,
 	  it should be safe to say N here.
 
+config PROC_PID_CPUSET
+	bool "Include legacy /proc/<pid>/cpuset file"
+	depends on CPUSETS
+	default y
+
 config RELAY
 	bool "Kernel->user space relay support (formerly relayfs)"
 	help
diff --git a/init/main.c b/init/main.c
index 9def935..0dd0e7a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -39,6 +39,7 @@
 #include <linux/writeback.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/efi.h>
 #include <linux/tick.h>
 #include <linux/interrupt.h>
@@ -523,6 +524,7 @@
 	 */
 	unwind_init();
 	lockdep_init();
+	cgroup_init_early();
 
 	local_irq_disable();
 	early_boot_irqs_off();
@@ -640,6 +642,7 @@
 #ifdef CONFIG_PROC_FS
 	proc_root_init();
 #endif
+	cgroup_init();
 	cpuset_init();
 	taskstats_init_early();
 	delayacct_init();
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 20f1fed..c0b26dc 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -29,6 +29,8 @@
 #include <linux/audit.h>
 #include <linux/signal.h>
 #include <linux/mutex.h>
+#include <linux/nsproxy.h>
+#include <linux/pid.h>
 
 #include <net/sock.h>
 #include "util.h"
@@ -330,7 +332,8 @@
 			(info->notify_owner &&
 			 info->notify.sigev_notify == SIGEV_SIGNAL) ?
 				info->notify.sigev_signo : 0,
-			pid_nr(info->notify_owner));
+			pid_nr_ns(info->notify_owner,
+				current->nsproxy->pid_ns));
 	spin_unlock(&info->lock);
 	buffer[sizeof(buffer)-1] = '\0';
 	slen = strlen(buffer)+1;
@@ -507,7 +510,7 @@
 			sig_i.si_errno = 0;
 			sig_i.si_code = SI_MESGQ;
 			sig_i.si_value = info->notify.sigev_value;
-			sig_i.si_pid = current->tgid;
+			sig_i.si_pid = task_pid_vnr(current);
 			sig_i.si_uid = current->uid;
 
 			kill_pid_info(info->notify.sigev_signo,
diff --git a/ipc/msg.c b/ipc/msg.c
index a03fcb52..fdf3db5 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -34,7 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/audit.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 
 #include <asm/current.h>
@@ -66,23 +66,15 @@
 #define SEARCH_NOTEQUAL		3
 #define SEARCH_LESSEQUAL	4
 
-static atomic_t msg_bytes =	ATOMIC_INIT(0);
-static atomic_t msg_hdrs =	ATOMIC_INIT(0);
-
 static struct ipc_ids init_msg_ids;
 
 #define msg_ids(ns)	(*((ns)->ids[IPC_MSG_IDS]))
 
-#define msg_lock(ns, id)	((struct msg_queue*)ipc_lock(&msg_ids(ns), id))
 #define msg_unlock(msq)		ipc_unlock(&(msq)->q_perm)
-#define msg_rmid(ns, id)	((struct msg_queue*)ipc_rmid(&msg_ids(ns), id))
-#define msg_checkid(ns, msq, msgid)	\
-	ipc_checkid(&msg_ids(ns), &msq->q_perm, msgid)
-#define msg_buildid(ns, id, seq) \
-	ipc_buildid(&msg_ids(ns), id, seq)
+#define msg_buildid(id, seq)	ipc_buildid(id, seq)
 
-static void freeque (struct ipc_namespace *ns, struct msg_queue *msq, int id);
-static int newque (struct ipc_namespace *ns, key_t key, int msgflg);
+static void freeque(struct ipc_namespace *, struct msg_queue *);
+static int newque(struct ipc_namespace *, struct ipc_params *);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
 #endif
@@ -93,7 +85,9 @@
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
-	ipc_init_ids(ids, ns->msg_ctlmni);
+	atomic_set(&ns->msg_bytes, 0);
+	atomic_set(&ns->msg_hdrs, 0);
+	ipc_init_ids(ids);
 }
 
 int msg_init_ns(struct ipc_namespace *ns)
@@ -110,20 +104,25 @@
 
 void msg_exit_ns(struct ipc_namespace *ns)
 {
-	int i;
 	struct msg_queue *msq;
+	int next_id;
+	int total, in_use;
 
-	mutex_lock(&msg_ids(ns).mutex);
-	for (i = 0; i <= msg_ids(ns).max_id; i++) {
-		msq = msg_lock(ns, i);
+	down_write(&msg_ids(ns).rw_mutex);
+
+	in_use = msg_ids(ns).in_use;
+
+	for (total = 0, next_id = 0; total < in_use; next_id++) {
+		msq = idr_find(&msg_ids(ns).ipcs_idr, next_id);
 		if (msq == NULL)
 			continue;
-
-		freeque(ns, msq, i);
+		ipc_lock_by_ptr(&msq->q_perm);
+		freeque(ns, msq);
+		total++;
 	}
-	mutex_unlock(&msg_ids(ns).mutex);
 
-	ipc_fini_ids(ns->ids[IPC_MSG_IDS]);
+	up_write(&msg_ids(ns).rw_mutex);
+
 	kfree(ns->ids[IPC_MSG_IDS]);
 	ns->ids[IPC_MSG_IDS] = NULL;
 }
@@ -136,10 +135,55 @@
 				IPC_MSG_IDS, sysvipc_msg_proc_show);
 }
 
-static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
+/*
+ * This routine is called in the paths where the rw_mutex is held to protect
+ * access to the idr tree.
+ */
+static inline struct msg_queue *msg_lock_check_down(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check_down(&msg_ids(ns), id);
+
+	return container_of(ipcp, struct msg_queue, q_perm);
+}
+
+/*
+ * msg_lock_(check_) routines are called in the paths where the rw_mutex
+ * is not held.
+ */
+static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id);
+
+	return container_of(ipcp, struct msg_queue, q_perm);
+}
+
+static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id);
+
+	return container_of(ipcp, struct msg_queue, q_perm);
+}
+
+static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
+{
+	ipc_rmid(&msg_ids(ns), &s->q_perm);
+}
+
+/**
+ * newque - Create a new msg queue
+ * @ns: namespace
+ * @params: ptr to the structure that contains the key and msgflg
+ *
+ * Called with msg_ids.rw_mutex held (writer)
+ */
+static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 {
 	struct msg_queue *msq;
 	int id, retval;
+	key_t key = params->key;
+	int msgflg = params->flg;
 
 	msq = ipc_rcu_alloc(sizeof(*msq));
 	if (!msq)
@@ -155,14 +199,17 @@
 		return retval;
 	}
 
+	/*
+	 * ipc_addid() locks msq
+	 */
 	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
-	if (id == -1) {
+	if (id < 0) {
 		security_msg_queue_free(msq);
 		ipc_rcu_putref(msq);
-		return -ENOSPC;
+		return id;
 	}
 
-	msq->q_id = msg_buildid(ns, id, msq->q_perm.seq);
+	msq->q_perm.id = msg_buildid(id, msq->q_perm.seq);
 	msq->q_stime = msq->q_rtime = 0;
 	msq->q_ctime = get_seconds();
 	msq->q_cbytes = msq->q_qnum = 0;
@@ -171,9 +218,10 @@
 	INIT_LIST_HEAD(&msq->q_messages);
 	INIT_LIST_HEAD(&msq->q_receivers);
 	INIT_LIST_HEAD(&msq->q_senders);
+
 	msg_unlock(msq);
 
-	return msq->q_id;
+	return msq->q_perm.id;
 }
 
 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
@@ -224,19 +272,19 @@
 
 /*
  * freeque() wakes up waiters on the sender and receiver waiting queue,
- * removes the message queue from message queue ID
- * array, and cleans up all the messages associated with this queue.
+ * removes the message queue from message queue ID IDR, and cleans up all the
+ * messages associated with this queue.
  *
- * msg_ids.mutex and the spinlock for this message queue is hold
- * before freeque() is called. msg_ids.mutex remains locked on exit.
+ * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
+ * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
  */
-static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
+static void freeque(struct ipc_namespace *ns, struct msg_queue *msq)
 {
 	struct list_head *tmp;
 
 	expunge_all(msq, -EIDRM);
 	ss_wakeup(&msq->q_senders, 1);
-	msq = msg_rmid(ns, id);
+	msg_rmid(ns, msq);
 	msg_unlock(msq);
 
 	tmp = msq->q_messages.next;
@@ -244,49 +292,40 @@
 		struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
 
 		tmp = tmp->next;
-		atomic_dec(&msg_hdrs);
+		atomic_dec(&ns->msg_hdrs);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &msg_bytes);
+	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
 	security_msg_queue_free(msq);
 	ipc_rcu_putref(msq);
 }
 
+/*
+ * Called with msg_ids.rw_mutex and ipcp locked.
+ */
+static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
+{
+	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
+
+	return security_msg_queue_associate(msq, msgflg);
+}
+
 asmlinkage long sys_msgget(key_t key, int msgflg)
 {
-	struct msg_queue *msq;
-	int id, ret = -EPERM;
 	struct ipc_namespace *ns;
+	struct ipc_ops msg_ops;
+	struct ipc_params msg_params;
 
 	ns = current->nsproxy->ipc_ns;
-	
-	mutex_lock(&msg_ids(ns).mutex);
-	if (key == IPC_PRIVATE) 
-		ret = newque(ns, key, msgflg);
-	else if ((id = ipc_findkey(&msg_ids(ns), key)) == -1) { /* key not used */
-		if (!(msgflg & IPC_CREAT))
-			ret = -ENOENT;
-		else
-			ret = newque(ns, key, msgflg);
-	} else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
-		ret = -EEXIST;
-	} else {
-		msq = msg_lock(ns, id);
-		BUG_ON(msq == NULL);
-		if (ipcperms(&msq->q_perm, msgflg))
-			ret = -EACCES;
-		else {
-			int qid = msg_buildid(ns, id, msq->q_perm.seq);
 
-			ret = security_msg_queue_associate(msq, msgflg);
-			if (!ret)
-				ret = qid;
-		}
-		msg_unlock(msq);
-	}
-	mutex_unlock(&msg_ids(ns).mutex);
+	msg_ops.getnew = newque;
+	msg_ops.associate = msg_security;
+	msg_ops.more_checks = NULL;
 
-	return ret;
+	msg_params.key = key;
+	msg_params.flg = msgflg;
+
+	return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
 }
 
 static inline unsigned long
@@ -420,23 +459,23 @@
 		msginfo.msgmnb = ns->msg_ctlmnb;
 		msginfo.msgssz = MSGSSZ;
 		msginfo.msgseg = MSGSEG;
-		mutex_lock(&msg_ids(ns).mutex);
+		down_read(&msg_ids(ns).rw_mutex);
 		if (cmd == MSG_INFO) {
 			msginfo.msgpool = msg_ids(ns).in_use;
-			msginfo.msgmap = atomic_read(&msg_hdrs);
-			msginfo.msgtql = atomic_read(&msg_bytes);
+			msginfo.msgmap = atomic_read(&ns->msg_hdrs);
+			msginfo.msgtql = atomic_read(&ns->msg_bytes);
 		} else {
 			msginfo.msgmap = MSGMAP;
 			msginfo.msgpool = MSGPOOL;
 			msginfo.msgtql = MSGTQL;
 		}
-		max_id = msg_ids(ns).max_id;
-		mutex_unlock(&msg_ids(ns).mutex);
+		max_id = ipc_get_maxid(&msg_ids(ns));
+		up_read(&msg_ids(ns).rw_mutex);
 		if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
 			return -EFAULT;
 		return (max_id < 0) ? 0 : max_id;
 	}
-	case MSG_STAT:
+	case MSG_STAT:	/* msqid is an index rather than a msg queue id */
 	case IPC_STAT:
 	{
 		struct msqid64_ds tbuf;
@@ -444,21 +483,16 @@
 
 		if (!buf)
 			return -EFAULT;
-		if (cmd == MSG_STAT && msqid >= msg_ids(ns).entries->size)
-			return -EINVAL;
-
-		memset(&tbuf, 0, sizeof(tbuf));
-
-		msq = msg_lock(ns, msqid);
-		if (msq == NULL)
-			return -EINVAL;
 
 		if (cmd == MSG_STAT) {
-			success_return = msg_buildid(ns, msqid, msq->q_perm.seq);
+			msq = msg_lock(ns, msqid);
+			if (IS_ERR(msq))
+				return PTR_ERR(msq);
+			success_return = msq->q_perm.id;
 		} else {
-			err = -EIDRM;
-			if (msg_checkid(ns, msq, msqid))
-				goto out_unlock;
+			msq = msg_lock_check(ns, msqid);
+			if (IS_ERR(msq))
+				return PTR_ERR(msq);
 			success_return = 0;
 		}
 		err = -EACCES;
@@ -469,6 +503,8 @@
 		if (err)
 			goto out_unlock;
 
+		memset(&tbuf, 0, sizeof(tbuf));
+
 		kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
 		tbuf.msg_stime  = msq->q_stime;
 		tbuf.msg_rtime  = msq->q_rtime;
@@ -495,15 +531,13 @@
 		return  -EINVAL;
 	}
 
-	mutex_lock(&msg_ids(ns).mutex);
-	msq = msg_lock(ns, msqid);
-	err = -EINVAL;
-	if (msq == NULL)
+	down_write(&msg_ids(ns).rw_mutex);
+	msq = msg_lock_check_down(ns, msqid);
+	if (IS_ERR(msq)) {
+		err = PTR_ERR(msq);
 		goto out_up;
+	}
 
-	err = -EIDRM;
-	if (msg_checkid(ns, msq, msqid))
-		goto out_unlock_up;
 	ipcp = &msq->q_perm;
 
 	err = audit_ipc_obj(ipcp);
@@ -552,12 +586,12 @@
 		break;
 	}
 	case IPC_RMID:
-		freeque(ns, msq, msqid);
+		freeque(ns, msq);
 		break;
 	}
 	err = 0;
 out_up:
-	mutex_unlock(&msg_ids(ns).mutex);
+	up_write(&msg_ids(ns).rw_mutex);
 	return err;
 out_unlock_up:
 	msg_unlock(msq);
@@ -611,7 +645,7 @@
 				msr->r_msg = ERR_PTR(-E2BIG);
 			} else {
 				msr->r_msg = NULL;
-				msq->q_lrpid = msr->r_tsk->pid;
+				msq->q_lrpid = task_pid_vnr(msr->r_tsk);
 				msq->q_rtime = get_seconds();
 				wake_up_process(msr->r_tsk);
 				smp_mb();
@@ -646,14 +680,11 @@
 	msg->m_type = mtype;
 	msg->m_ts = msgsz;
 
-	msq = msg_lock(ns, msqid);
-	err = -EINVAL;
-	if (msq == NULL)
+	msq = msg_lock_check(ns, msqid);
+	if (IS_ERR(msq)) {
+		err = PTR_ERR(msq);
 		goto out_free;
-
-	err= -EIDRM;
-	if (msg_checkid(ns, msq, msqid))
-		goto out_unlock_free;
+	}
 
 	for (;;) {
 		struct msg_sender s;
@@ -695,7 +726,7 @@
 		}
 	}
 
-	msq->q_lspid = current->tgid;
+	msq->q_lspid = task_tgid_vnr(current);
 	msq->q_stime = get_seconds();
 
 	if (!pipelined_send(msq, msg)) {
@@ -703,8 +734,8 @@
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &msg_bytes);
-		atomic_inc(&msg_hdrs);
+		atomic_add(msgsz, &ns->msg_bytes);
+		atomic_inc(&ns->msg_hdrs);
 	}
 
 	err = 0;
@@ -760,13 +791,9 @@
 	mode = convert_mode(&msgtyp, msgflg);
 	ns = current->nsproxy->ipc_ns;
 
-	msq = msg_lock(ns, msqid);
-	if (msq == NULL)
-		return -EINVAL;
-
-	msg = ERR_PTR(-EIDRM);
-	if (msg_checkid(ns, msq, msqid))
-		goto out_unlock;
+	msq = msg_lock_check(ns, msqid);
+	if (IS_ERR(msq))
+		return PTR_ERR(msq);
 
 	for (;;) {
 		struct msg_receiver msr_d;
@@ -810,10 +837,10 @@
 			list_del(&msg->m_list);
 			msq->q_qnum--;
 			msq->q_rtime = get_seconds();
-			msq->q_lrpid = current->tgid;
+			msq->q_lrpid = task_tgid_vnr(current);
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &msg_bytes);
-			atomic_dec(&msg_hdrs);
+			atomic_sub(msg->m_ts, &ns->msg_bytes);
+			atomic_dec(&ns->msg_hdrs);
 			ss_wakeup(&msq->q_senders, 0);
 			msg_unlock(msq);
 			break;
@@ -926,7 +953,7 @@
 	return seq_printf(s,
 			"%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
 			msq->q_perm.key,
-			msq->q_id,
+			msq->q_perm.id,
 			msq->q_perm.mode,
 			msq->q_cbytes,
 			msq->q_qnum,
diff --git a/ipc/sem.c b/ipc/sem.c
index b676fef..35952c0 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -80,7 +80,7 @@
 #include <linux/audit.h>
 #include <linux/capability.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
@@ -88,18 +88,14 @@
 
 #define sem_ids(ns)	(*((ns)->ids[IPC_SEM_IDS]))
 
-#define sem_lock(ns, id)	((struct sem_array*)ipc_lock(&sem_ids(ns), id))
 #define sem_unlock(sma)		ipc_unlock(&(sma)->sem_perm)
-#define sem_rmid(ns, id)	((struct sem_array*)ipc_rmid(&sem_ids(ns), id))
-#define sem_checkid(ns, sma, semid)	\
-	ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid)
-#define sem_buildid(ns, id, seq) \
-	ipc_buildid(&sem_ids(ns), id, seq)
+#define sem_checkid(sma, semid)	ipc_checkid(&sma->sem_perm, semid)
+#define sem_buildid(id, seq)	ipc_buildid(id, seq)
 
 static struct ipc_ids init_sem_ids;
 
-static int newary(struct ipc_namespace *, key_t, int, int);
-static void freeary(struct ipc_namespace *ns, struct sem_array *sma, int id);
+static int newary(struct ipc_namespace *, struct ipc_params *);
+static void freeary(struct ipc_namespace *, struct sem_array *);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #endif
@@ -129,7 +125,7 @@
 	ns->sc_semopm = SEMOPM;
 	ns->sc_semmni = SEMMNI;
 	ns->used_sems = 0;
-	ipc_init_ids(ids, ns->sc_semmni);
+	ipc_init_ids(ids);
 }
 
 int sem_init_ns(struct ipc_namespace *ns)
@@ -146,20 +142,24 @@
 
 void sem_exit_ns(struct ipc_namespace *ns)
 {
-	int i;
 	struct sem_array *sma;
+	int next_id;
+	int total, in_use;
 
-	mutex_lock(&sem_ids(ns).mutex);
-	for (i = 0; i <= sem_ids(ns).max_id; i++) {
-		sma = sem_lock(ns, i);
+	down_write(&sem_ids(ns).rw_mutex);
+
+	in_use = sem_ids(ns).in_use;
+
+	for (total = 0, next_id = 0; total < in_use; next_id++) {
+		sma = idr_find(&sem_ids(ns).ipcs_idr, next_id);
 		if (sma == NULL)
 			continue;
-
-		freeary(ns, sma, i);
+		ipc_lock_by_ptr(&sma->sem_perm);
+		freeary(ns, sma);
+		total++;
 	}
-	mutex_unlock(&sem_ids(ns).mutex);
+	up_write(&sem_ids(ns).rw_mutex);
 
-	ipc_fini_ids(ns->ids[IPC_SEM_IDS]);
 	kfree(ns->ids[IPC_SEM_IDS]);
 	ns->ids[IPC_SEM_IDS] = NULL;
 }
@@ -173,6 +173,42 @@
 }
 
 /*
+ * This routine is called in the paths where the rw_mutex is held to protect
+ * access to the idr tree.
+ */
+static inline struct sem_array *sem_lock_check_down(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check_down(&sem_ids(ns), id);
+
+	return container_of(ipcp, struct sem_array, sem_perm);
+}
+
+/*
+ * sem_lock_(check_) routines are called in the paths where the rw_mutex
+ * is not held.
+ */
+static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id);
+
+	return container_of(ipcp, struct sem_array, sem_perm);
+}
+
+static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
+
+	return container_of(ipcp, struct sem_array, sem_perm);
+}
+
+static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
+{
+	ipc_rmid(&sem_ids(ns), &s->sem_perm);
+}
+
+/*
  * Lockless wakeup algorithm:
  * Without the check/retry algorithm a lockless wakeup is possible:
  * - queue.status is initialized to -EINTR before blocking.
@@ -206,12 +242,23 @@
  */
 #define IN_WAKEUP	1
 
-static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
+/**
+ * newary - Create a new semaphore set
+ * @ns: namespace
+ * @params: ptr to the structure that contains key, semflg and nsems
+ *
+ * Called with sem_ids.rw_mutex held (as a writer)
+ */
+
+static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 {
 	int id;
 	int retval;
 	struct sem_array *sma;
 	int size;
+	key_t key = params->key;
+	int nsems = params->u.nsems;
+	int semflg = params->flg;
 
 	if (!nsems)
 		return -EINVAL;
@@ -236,14 +283,14 @@
 	}
 
 	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
-	if(id == -1) {
+	if (id < 0) {
 		security_sem_free(sma);
 		ipc_rcu_putref(sma);
-		return -ENOSPC;
+		return id;
 	}
 	ns->used_sems += nsems;
 
-	sma->sem_id = sem_buildid(ns, id, sma->sem_perm.seq);
+	sma->sem_perm.id = sem_buildid(id, sma->sem_perm.seq);
 	sma->sem_base = (struct sem *) &sma[1];
 	/* sma->sem_pending = NULL; */
 	sma->sem_pending_last = &sma->sem_pending;
@@ -252,48 +299,56 @@
 	sma->sem_ctime = get_seconds();
 	sem_unlock(sma);
 
-	return sma->sem_id;
+	return sma->sem_perm.id;
 }
 
-asmlinkage long sys_semget (key_t key, int nsems, int semflg)
+
+/*
+ * Called with sem_ids.rw_mutex and ipcp locked.
+ */
+static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
 {
-	int id, err = -EINVAL;
 	struct sem_array *sma;
+
+	sma = container_of(ipcp, struct sem_array, sem_perm);
+	return security_sem_associate(sma, semflg);
+}
+
+/*
+ * Called with sem_ids.rw_mutex and ipcp locked.
+ */
+static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
+				struct ipc_params *params)
+{
+	struct sem_array *sma;
+
+	sma = container_of(ipcp, struct sem_array, sem_perm);
+	if (params->u.nsems > sma->sem_nsems)
+		return -EINVAL;
+
+	return 0;
+}
+
+asmlinkage long sys_semget(key_t key, int nsems, int semflg)
+{
 	struct ipc_namespace *ns;
+	struct ipc_ops sem_ops;
+	struct ipc_params sem_params;
 
 	ns = current->nsproxy->ipc_ns;
 
 	if (nsems < 0 || nsems > ns->sc_semmsl)
 		return -EINVAL;
-	mutex_lock(&sem_ids(ns).mutex);
-	
-	if (key == IPC_PRIVATE) {
-		err = newary(ns, key, nsems, semflg);
-	} else if ((id = ipc_findkey(&sem_ids(ns), key)) == -1) {  /* key not used */
-		if (!(semflg & IPC_CREAT))
-			err = -ENOENT;
-		else
-			err = newary(ns, key, nsems, semflg);
-	} else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
-		err = -EEXIST;
-	} else {
-		sma = sem_lock(ns, id);
-		BUG_ON(sma==NULL);
-		if (nsems > sma->sem_nsems)
-			err = -EINVAL;
-		else if (ipcperms(&sma->sem_perm, semflg))
-			err = -EACCES;
-		else {
-			int semid = sem_buildid(ns, id, sma->sem_perm.seq);
-			err = security_sem_associate(sma, semflg);
-			if (!err)
-				err = semid;
-		}
-		sem_unlock(sma);
-	}
 
-	mutex_unlock(&sem_ids(ns).mutex);
-	return err;
+	sem_ops.getnew = newary;
+	sem_ops.associate = sem_security;
+	sem_ops.more_checks = sem_more_checks;
+
+	sem_params.key = key;
+	sem_params.flg = semflg;
+	sem_params.u.nsems = nsems;
+
+	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
 }
 
 /* Manage the doubly linked list sma->sem_pending as a FIFO:
@@ -487,15 +542,14 @@
 	return semzcnt;
 }
 
-/* Free a semaphore set. freeary() is called with sem_ids.mutex locked and
- * the spinlock for this semaphore set hold. sem_ids.mutex remains locked
- * on exit.
+/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
+ * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
+ * remains locked on exit.
  */
-static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id)
+static void freeary(struct ipc_namespace *ns, struct sem_array *sma)
 {
 	struct sem_undo *un;
 	struct sem_queue *q;
-	int size;
 
 	/* Invalidate the existing undo structures for this semaphore set.
 	 * (They will be freed without any further action in exit_sem()
@@ -518,12 +572,11 @@
 		q = n;
 	}
 
-	/* Remove the semaphore set from the ID array*/
-	sma = sem_rmid(ns, id);
+	/* Remove the semaphore set from the IDR */
+	sem_rmid(ns, sma);
 	sem_unlock(sma);
 
 	ns->used_sems -= sma->sem_nsems;
-	size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
 	security_sem_free(sma);
 	ipc_rcu_putref(sma);
 }
@@ -576,7 +629,7 @@
 		seminfo.semmnu = SEMMNU;
 		seminfo.semmap = SEMMAP;
 		seminfo.semume = SEMUME;
-		mutex_lock(&sem_ids(ns).mutex);
+		down_read(&sem_ids(ns).rw_mutex);
 		if (cmd == SEM_INFO) {
 			seminfo.semusz = sem_ids(ns).in_use;
 			seminfo.semaem = ns->used_sems;
@@ -584,8 +637,8 @@
 			seminfo.semusz = SEMUSZ;
 			seminfo.semaem = SEMAEM;
 		}
-		max_id = sem_ids(ns).max_id;
-		mutex_unlock(&sem_ids(ns).mutex);
+		max_id = ipc_get_maxid(&sem_ids(ns));
+		up_read(&sem_ids(ns).rw_mutex);
 		if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 
 			return -EFAULT;
 		return (max_id < 0) ? 0: max_id;
@@ -595,14 +648,9 @@
 		struct semid64_ds tbuf;
 		int id;
 
-		if(semid >= sem_ids(ns).entries->size)
-			return -EINVAL;
-
-		memset(&tbuf,0,sizeof(tbuf));
-
 		sma = sem_lock(ns, semid);
-		if(sma == NULL)
-			return -EINVAL;
+		if (IS_ERR(sma))
+			return PTR_ERR(sma);
 
 		err = -EACCES;
 		if (ipcperms (&sma->sem_perm, S_IRUGO))
@@ -612,7 +660,9 @@
 		if (err)
 			goto out_unlock;
 
-		id = sem_buildid(ns, semid, sma->sem_perm.seq);
+		id = sma->sem_perm.id;
+
+		memset(&tbuf, 0, sizeof(tbuf));
 
 		kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
 		tbuf.sem_otime  = sma->sem_otime;
@@ -642,16 +692,12 @@
 	ushort* sem_io = fast_sem_io;
 	int nsems;
 
-	sma = sem_lock(ns, semid);
-	if(sma==NULL)
-		return -EINVAL;
+	sma = sem_lock_check(ns, semid);
+	if (IS_ERR(sma))
+		return PTR_ERR(sma);
 
 	nsems = sma->sem_nsems;
 
-	err=-EIDRM;
-	if (sem_checkid(ns,sma,semid))
-		goto out_unlock;
-
 	err = -EACCES;
 	if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO))
 		goto out_unlock;
@@ -795,7 +841,7 @@
 		for (un = sma->undo; un; un = un->id_next)
 			un->semadj[semnum] = 0;
 		curr->semval = val;
-		curr->sempid = current->tgid;
+		curr->sempid = task_tgid_vnr(current);
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -863,14 +909,10 @@
 		if(copy_semid_from_user (&setbuf, arg.buf, version))
 			return -EFAULT;
 	}
-	sma = sem_lock(ns, semid);
-	if(sma==NULL)
-		return -EINVAL;
+	sma = sem_lock_check_down(ns, semid);
+	if (IS_ERR(sma))
+		return PTR_ERR(sma);
 
-	if (sem_checkid(ns,sma,semid)) {
-		err=-EIDRM;
-		goto out_unlock;
-	}	
 	ipcp = &sma->sem_perm;
 
 	err = audit_ipc_obj(ipcp);
@@ -894,7 +936,7 @@
 
 	switch(cmd){
 	case IPC_RMID:
-		freeary(ns, sma, semid);
+		freeary(ns, sma);
 		err = 0;
 		break;
 	case IPC_SET:
@@ -948,45 +990,15 @@
 		return err;
 	case IPC_RMID:
 	case IPC_SET:
-		mutex_lock(&sem_ids(ns).mutex);
+		down_write(&sem_ids(ns).rw_mutex);
 		err = semctl_down(ns,semid,semnum,cmd,version,arg);
-		mutex_unlock(&sem_ids(ns).mutex);
+		up_write(&sem_ids(ns).rw_mutex);
 		return err;
 	default:
 		return -EINVAL;
 	}
 }
 
-static inline void lock_semundo(void)
-{
-	struct sem_undo_list *undo_list;
-
-	undo_list = current->sysvsem.undo_list;
-	if (undo_list)
-		spin_lock(&undo_list->lock);
-}
-
-/* This code has an interaction with copy_semundo().
- * Consider; two tasks are sharing the undo_list. task1
- * acquires the undo_list lock in lock_semundo().  If task2 now
- * exits before task1 releases the lock (by calling
- * unlock_semundo()), then task1 will never call spin_unlock().
- * This leave the sem_undo_list in a locked state.  If task1 now creats task3
- * and once again shares the sem_undo_list, the sem_undo_list will still be
- * locked, and future SEM_UNDO operations will deadlock.  This case is
- * dealt with in copy_semundo() by having it reinitialize the spin lock when 
- * the refcnt goes from 1 to 2.
- */
-static inline void unlock_semundo(void)
-{
-	struct sem_undo_list *undo_list;
-
-	undo_list = current->sysvsem.undo_list;
-	if (undo_list)
-		spin_unlock(&undo_list->lock);
-}
-
-
 /* If the task doesn't already have a undo_list, then allocate one
  * here.  We guarantee there is only one thread using this undo list,
  * and current is THE ONE
@@ -1047,22 +1059,17 @@
 	if (error)
 		return ERR_PTR(error);
 
-	lock_semundo();
+	spin_lock(&ulp->lock);
 	un = lookup_undo(ulp, semid);
-	unlock_semundo();
+	spin_unlock(&ulp->lock);
 	if (likely(un!=NULL))
 		goto out;
 
 	/* no undo structure around - allocate one. */
-	sma = sem_lock(ns, semid);
-	un = ERR_PTR(-EINVAL);
-	if(sma==NULL)
-		goto out;
-	un = ERR_PTR(-EIDRM);
-	if (sem_checkid(ns,sma,semid)) {
-		sem_unlock(sma);
-		goto out;
-	}
+	sma = sem_lock_check(ns, semid);
+	if (IS_ERR(sma))
+		return ERR_PTR(PTR_ERR(sma));
+
 	nsems = sma->sem_nsems;
 	ipc_rcu_getref(sma);
 	sem_unlock(sma);
@@ -1077,10 +1084,10 @@
 	new->semadj = (short *) &new[1];
 	new->semid = semid;
 
-	lock_semundo();
+	spin_lock(&ulp->lock);
 	un = lookup_undo(ulp, semid);
 	if (un) {
-		unlock_semundo();
+		spin_unlock(&ulp->lock);
 		kfree(new);
 		ipc_lock_by_ptr(&sma->sem_perm);
 		ipc_rcu_putref(sma);
@@ -1091,7 +1098,7 @@
 	ipc_rcu_putref(sma);
 	if (sma->sem_perm.deleted) {
 		sem_unlock(sma);
-		unlock_semundo();
+		spin_unlock(&ulp->lock);
 		kfree(new);
 		un = ERR_PTR(-EIDRM);
 		goto out;
@@ -1102,7 +1109,7 @@
 	sma->undo = new;
 	sem_unlock(sma);
 	un = new;
-	unlock_semundo();
+	spin_unlock(&ulp->lock);
 out:
 	return un;
 }
@@ -1168,15 +1175,14 @@
 	} else
 		un = NULL;
 
-	sma = sem_lock(ns, semid);
-	error=-EINVAL;
-	if(sma==NULL)
+	sma = sem_lock_check(ns, semid);
+	if (IS_ERR(sma)) {
+		error = PTR_ERR(sma);
 		goto out_free;
-	error = -EIDRM;
-	if (sem_checkid(ns,sma,semid))
-		goto out_unlock_free;
+	}
+
 	/*
-	 * semid identifies are not unique - find_undo may have
+	 * semid identifiers are not unique - find_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
 	 * and now a new array with received the same id. Check and retry.
 	 */
@@ -1196,7 +1202,7 @@
 	if (error)
 		goto out_unlock_free;
 
-	error = try_atomic_semop (sma, sops, nsops, un, current->tgid);
+	error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
 	if (error <= 0) {
 		if (alter && error == 0)
 			update_queue (sma);
@@ -1211,7 +1217,7 @@
 	queue.sops = sops;
 	queue.nsops = nsops;
 	queue.undo = un;
-	queue.pid = current->tgid;
+	queue.pid = task_tgid_vnr(current);
 	queue.id = semid;
 	queue.alter = alter;
 	if (alter)
@@ -1242,7 +1248,7 @@
 	}
 
 	sma = sem_lock(ns, semid);
-	if(sma==NULL) {
+	if (IS_ERR(sma)) {
 		BUG_ON(queue.prev != NULL);
 		error = -EIDRM;
 		goto out_free;
@@ -1279,10 +1285,6 @@
 
 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
  * parent and child tasks.
- *
- * See the notes above unlock_semundo() regarding the spin_lock_init()
- * in this code.  Initialize the undo_list->lock here instead of get_undo_list()
- * because of the reasoning in the comment above unlock_semundo.
  */
 
 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
@@ -1342,13 +1344,13 @@
 		if(semid == -1)
 			continue;
 		sma = sem_lock(ns, semid);
-		if (sma == NULL)
+		if (IS_ERR(sma))
 			continue;
 
 		if (u->semid == -1)
 			goto next_entry;
 
-		BUG_ON(sem_checkid(ns,sma,u->semid));
+		BUG_ON(sem_checkid(sma, u->semid));
 
 		/* remove u from the sma->undo list */
 		for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
@@ -1382,7 +1384,7 @@
 					semaphore->semval = 0;
 				if (semaphore->semval > SEMVMX)
 					semaphore->semval = SEMVMX;
-				semaphore->sempid = current->tgid;
+				semaphore->sempid = task_tgid_vnr(current);
 			}
 		}
 		sma->sem_otime = get_seconds();
@@ -1402,7 +1404,7 @@
 	return seq_printf(s,
 			  "%10d %10d  %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
 			  sma->sem_perm.key,
-			  sma->sem_id,
+			  sma->sem_perm.id,
 			  sma->sem_perm.mode,
 			  sma->sem_nsems,
 			  sma->sem_perm.uid,
diff --git a/ipc/shm.c b/ipc/shm.c
index 5fc5cf5..3818fae 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -35,7 +35,7 @@
 #include <linux/capability.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/mount.h>
 
@@ -59,17 +59,11 @@
 
 #define shm_ids(ns)	(*((ns)->ids[IPC_SHM_IDS]))
 
-#define shm_lock(ns, id)		\
-	((struct shmid_kernel*)ipc_lock(&shm_ids(ns),id))
 #define shm_unlock(shp)			\
 	ipc_unlock(&(shp)->shm_perm)
-#define shm_get(ns, id)			\
-	((struct shmid_kernel*)ipc_get(&shm_ids(ns),id))
-#define shm_buildid(ns, id, seq)	\
-	ipc_buildid(&shm_ids(ns), id, seq)
+#define shm_buildid(id, seq)	ipc_buildid(id, seq)
 
-static int newseg (struct ipc_namespace *ns, key_t key,
-		int shmflg, size_t size);
+static int newseg(struct ipc_namespace *, struct ipc_params *);
 static void shm_open(struct vm_area_struct *vma);
 static void shm_close(struct vm_area_struct *vma);
 static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
@@ -84,9 +78,13 @@
 	ns->shm_ctlall = SHMALL;
 	ns->shm_ctlmni = SHMMNI;
 	ns->shm_tot = 0;
-	ipc_init_ids(ids, 1);
+	ipc_init_ids(ids);
 }
 
+/*
+ * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
+ * Only shm_ids.rw_mutex remains locked on exit.
+ */
 static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
 	if (shp->shm_nattch){
@@ -112,20 +110,24 @@
 
 void shm_exit_ns(struct ipc_namespace *ns)
 {
-	int i;
 	struct shmid_kernel *shp;
+	int next_id;
+	int total, in_use;
 
-	mutex_lock(&shm_ids(ns).mutex);
-	for (i = 0; i <= shm_ids(ns).max_id; i++) {
-		shp = shm_lock(ns, i);
+	down_write(&shm_ids(ns).rw_mutex);
+
+	in_use = shm_ids(ns).in_use;
+
+	for (total = 0, next_id = 0; total < in_use; next_id++) {
+		shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
 		if (shp == NULL)
 			continue;
-
+		ipc_lock_by_ptr(&shp->shm_perm);
 		do_shm_rmid(ns, shp);
+		total++;
 	}
-	mutex_unlock(&shm_ids(ns).mutex);
+	up_write(&shm_ids(ns).rw_mutex);
 
-	ipc_fini_ids(ns->ids[IPC_SHM_IDS]);
 	kfree(ns->ids[IPC_SHM_IDS]);
 	ns->ids[IPC_SHM_IDS] = NULL;
 }
@@ -138,17 +140,49 @@
 				IPC_SHM_IDS, sysvipc_shm_proc_show);
 }
 
-static inline int shm_checkid(struct ipc_namespace *ns,
-		struct shmid_kernel *s, int id)
+/*
+ * shm_lock_(check_)down routines are called in the paths where the rw_mutex
+ * is held to protect access to the idr tree.
+ */
+static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
+						int id)
 {
-	if (ipc_checkid(&shm_ids(ns), &s->shm_perm, id))
-		return -EIDRM;
-	return 0;
+	struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
 }
 
-static inline struct shmid_kernel *shm_rmid(struct ipc_namespace *ns, int id)
+static inline struct shmid_kernel *shm_lock_check_down(
+						struct ipc_namespace *ns,
+						int id)
 {
-	return (struct shmid_kernel *)ipc_rmid(&shm_ids(ns), id);
+	struct kern_ipc_perm *ipcp = ipc_lock_check_down(&shm_ids(ns), id);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+/*
+ * shm_lock_(check_) routines are called in the paths where the rw_mutex
+ * is not held.
+ */
+static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
+{
+	ipc_rmid(&shm_ids(ns), &s->shm_perm);
 }
 
 static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
@@ -166,9 +200,9 @@
 	struct shmid_kernel *shp;
 
 	shp = shm_lock(sfd->ns, sfd->id);
-	BUG_ON(!shp);
+	BUG_ON(IS_ERR(shp));
 	shp->shm_atim = get_seconds();
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = task_tgid_vnr(current);
 	shp->shm_nattch++;
 	shm_unlock(shp);
 }
@@ -176,15 +210,16 @@
 /*
  * shm_destroy - free the struct shmid_kernel
  *
+ * @ns: namespace
  * @shp: struct to free
  *
- * It has to be called with shp and shm_ids.mutex locked,
+ * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
  * but returns with shp unlocked and freed.
  */
 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
 	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	shm_rmid(ns, shp->id);
+	shm_rmid(ns, shp);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shp->shm_file))
 		shmem_lock(shp->shm_file, 0, shp->mlock_user);
@@ -209,11 +244,11 @@
 	struct shmid_kernel *shp;
 	struct ipc_namespace *ns = sfd->ns;
 
-	mutex_lock(&shm_ids(ns).mutex);
+	down_write(&shm_ids(ns).rw_mutex);
 	/* remove from the list of attaches of the shm segment */
-	shp = shm_lock(ns, sfd->id);
-	BUG_ON(!shp);
-	shp->shm_lprid = current->tgid;
+	shp = shm_lock_down(ns, sfd->id);
+	BUG_ON(IS_ERR(shp));
+	shp->shm_lprid = task_tgid_vnr(current);
 	shp->shm_dtim = get_seconds();
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
@@ -221,7 +256,7 @@
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
-	mutex_unlock(&shm_ids(ns).mutex);
+	up_write(&shm_ids(ns).rw_mutex);
 }
 
 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -337,8 +372,19 @@
 #endif
 };
 
-static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size)
+/**
+ * newseg - Create a new shared memory segment
+ * @ns: namespace
+ * @params: ptr to the structure that contains key, size and shmflg
+ *
+ * Called with shm_ids.rw_mutex held as a writer.
+ */
+
+static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 {
+	key_t key = params->key;
+	int shmflg = params->flg;
+	size_t size = params->u.size;
 	int error;
 	struct shmid_kernel *shp;
 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
@@ -387,28 +433,30 @@
 	if (IS_ERR(file))
 		goto no_file;
 
-	error = -ENOSPC;
 	id = shm_addid(ns, shp);
-	if(id == -1) 
+	if (id < 0) {
+		error = id;
 		goto no_id;
+	}
 
-	shp->shm_cprid = current->tgid;
+	shp->shm_cprid = task_tgid_vnr(current);
 	shp->shm_lprid = 0;
 	shp->shm_atim = shp->shm_dtim = 0;
 	shp->shm_ctim = get_seconds();
 	shp->shm_segsz = size;
 	shp->shm_nattch = 0;
-	shp->id = shm_buildid(ns, id, shp->shm_perm.seq);
+	shp->shm_perm.id = shm_buildid(id, shp->shm_perm.seq);
 	shp->shm_file = file;
 	/*
 	 * shmid gets reported as "inode#" in /proc/pid/maps.
 	 * proc-ps tools use this. Changing this will break them.
 	 */
-	file->f_dentry->d_inode->i_ino = shp->id;
+	file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
 
 	ns->shm_tot += numpages;
+	error = shp->shm_perm.id;
 	shm_unlock(shp);
-	return shp->id;
+	return error;
 
 no_id:
 	fput(file);
@@ -418,42 +466,49 @@
 	return error;
 }
 
-asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
+/*
+ * Called with shm_ids.rw_mutex and ipcp locked.
+ */
+static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
 {
 	struct shmid_kernel *shp;
-	int err, id = 0;
+
+	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+	return security_shm_associate(shp, shmflg);
+}
+
+/*
+ * Called with shm_ids.rw_mutex and ipcp locked.
+ */
+static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
+				struct ipc_params *params)
+{
+	struct shmid_kernel *shp;
+
+	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+	if (shp->shm_segsz < params->u.size)
+		return -EINVAL;
+
+	return 0;
+}
+
+asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
+{
 	struct ipc_namespace *ns;
+	struct ipc_ops shm_ops;
+	struct ipc_params shm_params;
 
 	ns = current->nsproxy->ipc_ns;
 
-	mutex_lock(&shm_ids(ns).mutex);
-	if (key == IPC_PRIVATE) {
-		err = newseg(ns, key, shmflg, size);
-	} else if ((id = ipc_findkey(&shm_ids(ns), key)) == -1) {
-		if (!(shmflg & IPC_CREAT))
-			err = -ENOENT;
-		else
-			err = newseg(ns, key, shmflg, size);
-	} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
-		err = -EEXIST;
-	} else {
-		shp = shm_lock(ns, id);
-		BUG_ON(shp==NULL);
-		if (shp->shm_segsz < size)
-			err = -EINVAL;
-		else if (ipcperms(&shp->shm_perm, shmflg))
-			err = -EACCES;
-		else {
-			int shmid = shm_buildid(ns, id, shp->shm_perm.seq);
-			err = security_shm_associate(shp, shmflg);
-			if (!err)
-				err = shmid;
-		}
-		shm_unlock(shp);
-	}
-	mutex_unlock(&shm_ids(ns).mutex);
+	shm_ops.getnew = newseg;
+	shm_ops.associate = shm_security;
+	shm_ops.more_checks = shm_more_checks;
 
-	return err;
+	shm_params.key = key;
+	shm_params.flg = shmflg;
+	shm_params.u.size = size;
+
+	return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
 }
 
 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
@@ -547,20 +602,26 @@
 	}
 }
 
+/*
+ * Called with shm_ids.rw_mutex held as a reader
+ */
 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 		unsigned long *swp)
 {
-	int i;
+	int next_id;
+	int total, in_use;
 
 	*rss = 0;
 	*swp = 0;
 
-	for (i = 0; i <= shm_ids(ns).max_id; i++) {
+	in_use = shm_ids(ns).in_use;
+
+	for (total = 0, next_id = 0; total < in_use; next_id++) {
 		struct shmid_kernel *shp;
 		struct inode *inode;
 
-		shp = shm_get(ns, i);
-		if(!shp)
+		shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
+		if (shp == NULL)
 			continue;
 
 		inode = shp->shm_file->f_path.dentry->d_inode;
@@ -575,6 +636,8 @@
 			*swp += info->swapped;
 			spin_unlock(&info->lock);
 		}
+
+		total++;
 	}
 }
 
@@ -610,8 +673,11 @@
 		shminfo.shmmin = SHMMIN;
 		if(copy_shminfo_to_user (buf, &shminfo, version))
 			return -EFAULT;
-		/* reading a integer is always atomic */
-		err= shm_ids(ns).max_id;
+
+		down_read(&shm_ids(ns).rw_mutex);
+		err = ipc_get_maxid(&shm_ids(ns));
+		up_read(&shm_ids(ns).rw_mutex);
+
 		if(err<0)
 			err = 0;
 		goto out;
@@ -625,14 +691,14 @@
 			return err;
 
 		memset(&shm_info,0,sizeof(shm_info));
-		mutex_lock(&shm_ids(ns).mutex);
+		down_read(&shm_ids(ns).rw_mutex);
 		shm_info.used_ids = shm_ids(ns).in_use;
 		shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
 		shm_info.shm_tot = ns->shm_tot;
 		shm_info.swap_attempts = 0;
 		shm_info.swap_successes = 0;
-		err = shm_ids(ns).max_id;
-		mutex_unlock(&shm_ids(ns).mutex);
+		err = ipc_get_maxid(&shm_ids(ns));
+		up_read(&shm_ids(ns).rw_mutex);
 		if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
 			err = -EFAULT;
 			goto out;
@@ -646,20 +712,25 @@
 	{
 		struct shmid64_ds tbuf;
 		int result;
-		memset(&tbuf, 0, sizeof(tbuf));
-		shp = shm_lock(ns, shmid);
-		if(shp==NULL) {
-			err = -EINVAL;
+
+		if (!buf) {
+			err = -EFAULT;
 			goto out;
-		} else if(cmd==SHM_STAT) {
-			err = -EINVAL;
-			if (shmid > shm_ids(ns).max_id)
-				goto out_unlock;
-			result = shm_buildid(ns, shmid, shp->shm_perm.seq);
+		}
+
+		if (cmd == SHM_STAT) {
+			shp = shm_lock(ns, shmid);
+			if (IS_ERR(shp)) {
+				err = PTR_ERR(shp);
+				goto out;
+			}
+			result = shp->shm_perm.id;
 		} else {
-			err = shm_checkid(ns, shp,shmid);
-			if(err)
-				goto out_unlock;
+			shp = shm_lock_check(ns, shmid);
+			if (IS_ERR(shp)) {
+				err = PTR_ERR(shp);
+				goto out;
+			}
 			result = 0;
 		}
 		err=-EACCES;
@@ -668,6 +739,7 @@
 		err = security_shm_shmctl(shp, cmd);
 		if (err)
 			goto out_unlock;
+		memset(&tbuf, 0, sizeof(tbuf));
 		kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
 		tbuf.shm_segsz	= shp->shm_segsz;
 		tbuf.shm_atime	= shp->shm_atim;
@@ -686,14 +758,11 @@
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 	{
-		shp = shm_lock(ns, shmid);
-		if(shp==NULL) {
-			err = -EINVAL;
+		shp = shm_lock_check(ns, shmid);
+		if (IS_ERR(shp)) {
+			err = PTR_ERR(shp);
 			goto out;
 		}
-		err = shm_checkid(ns, shp,shmid);
-		if(err)
-			goto out_unlock;
 
 		err = audit_ipc_obj(&(shp->shm_perm));
 		if (err)
@@ -742,14 +811,12 @@
 		 *	Instead we set a destroyed flag, and then blow
 		 *	the name away when the usage hits zero.
 		 */
-		mutex_lock(&shm_ids(ns).mutex);
-		shp = shm_lock(ns, shmid);
-		err = -EINVAL;
-		if (shp == NULL) 
+		down_write(&shm_ids(ns).rw_mutex);
+		shp = shm_lock_check_down(ns, shmid);
+		if (IS_ERR(shp)) {
+			err = PTR_ERR(shp);
 			goto out_up;
-		err = shm_checkid(ns, shp, shmid);
-		if(err)
-			goto out_unlock_up;
+		}
 
 		err = audit_ipc_obj(&(shp->shm_perm));
 		if (err)
@@ -767,24 +834,27 @@
 			goto out_unlock_up;
 
 		do_shm_rmid(ns, shp);
-		mutex_unlock(&shm_ids(ns).mutex);
+		up_write(&shm_ids(ns).rw_mutex);
 		goto out;
 	}
 
 	case IPC_SET:
 	{
+		if (!buf) {
+			err = -EFAULT;
+			goto out;
+		}
+
 		if (copy_shmid_from_user (&setbuf, buf, version)) {
 			err = -EFAULT;
 			goto out;
 		}
-		mutex_lock(&shm_ids(ns).mutex);
-		shp = shm_lock(ns, shmid);
-		err=-EINVAL;
-		if(shp==NULL)
+		down_write(&shm_ids(ns).rw_mutex);
+		shp = shm_lock_check_down(ns, shmid);
+		if (IS_ERR(shp)) {
+			err = PTR_ERR(shp);
 			goto out_up;
-		err = shm_checkid(ns, shp,shmid);
-		if(err)
-			goto out_unlock_up;
+		}
 		err = audit_ipc_obj(&(shp->shm_perm));
 		if (err)
 			goto out_unlock_up;
@@ -819,7 +889,7 @@
 out_unlock_up:
 	shm_unlock(shp);
 out_up:
-	mutex_unlock(&shm_ids(ns).mutex);
+	up_write(&shm_ids(ns).rw_mutex);
 	goto out;
 out_unlock:
 	shm_unlock(shp);
@@ -890,13 +960,11 @@
 	 * additional creator id...
 	 */
 	ns = current->nsproxy->ipc_ns;
-	shp = shm_lock(ns, shmid);
-	if(shp == NULL)
+	shp = shm_lock_check(ns, shmid);
+	if (IS_ERR(shp)) {
+		err = PTR_ERR(shp);
 		goto out;
-
-	err = shm_checkid(ns, shp,shmid);
-	if (err)
-		goto out_unlock;
+	}
 
 	err = -EACCES;
 	if (ipcperms(&shp->shm_perm, acc_mode))
@@ -925,7 +993,7 @@
 
 	file->private_data = sfd;
 	file->f_mapping = shp->shm_file->f_mapping;
-	sfd->id = shp->id;
+	sfd->id = shp->shm_perm.id;
 	sfd->ns = get_ipc_ns(ns);
 	sfd->file = shp->shm_file;
 	sfd->vm_ops = NULL;
@@ -955,16 +1023,16 @@
 	fput(file);
 
 out_nattch:
-	mutex_lock(&shm_ids(ns).mutex);
-	shp = shm_lock(ns, shmid);
-	BUG_ON(!shp);
+	down_write(&shm_ids(ns).rw_mutex);
+	shp = shm_lock_down(ns, shmid);
+	BUG_ON(IS_ERR(shp));
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
 	   shp->shm_perm.mode & SHM_DEST)
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
-	mutex_unlock(&shm_ids(ns).mutex);
+	up_write(&shm_ids(ns).rw_mutex);
 
 out:
 	return err;
@@ -1094,7 +1162,7 @@
 		format = BIG_STRING;
 	return seq_printf(s, format,
 			  shp->shm_perm.key,
-			  shp->id,
+			  shp->shm_perm.id,
 			  shp->shm_perm.mode,
 			  shp->shm_segsz,
 			  shp->shm_cprid,
diff --git a/ipc/util.c b/ipc/util.c
index 44e5135..1aa0ebf 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -32,6 +32,7 @@
 #include <linux/proc_fs.h>
 #include <linux/audit.h>
 #include <linux/nsproxy.h>
+#include <linux/rwsem.h>
 
 #include <asm/unistd.h>
 
@@ -129,23 +130,16 @@
 /**
  *	ipc_init_ids		-	initialise IPC identifiers
  *	@ids: Identifier set
- *	@size: Number of identifiers
  *
- *	Given a size for the ipc identifier range (limited below IPCMNI)
- *	set up the sequence range to use then allocate and initialise the
- *	array itself. 
+ *	Set up the sequence range to use for the ipc identifier range (limited
+ *	below IPCMNI) then initialise the ids idr.
  */
  
-void ipc_init_ids(struct ipc_ids* ids, int size)
+void ipc_init_ids(struct ipc_ids *ids)
 {
-	int i;
+	init_rwsem(&ids->rw_mutex);
 
-	mutex_init(&ids->mutex);
-
-	if(size > IPCMNI)
-		size = IPCMNI;
 	ids->in_use = 0;
-	ids->max_id = -1;
 	ids->seq = 0;
 	{
 		int seq_limit = INT_MAX/SEQ_MULTIPLIER;
@@ -155,17 +149,7 @@
 		 	ids->seq_max = seq_limit;
 	}
 
-	ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size +
-				     sizeof(struct ipc_id_ary));
-
-	if(ids->entries == NULL) {
-		printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n");
-		size = 0;
-		ids->entries = &ids->nullentry;
-	}
-	ids->entries->size = size;
-	for(i=0;i<size;i++)
-		ids->entries->p[i] = NULL;
+	idr_init(&ids->ipcs_idr);
 }
 
 #ifdef CONFIG_PROC_FS
@@ -208,99 +192,96 @@
  *	@ids: Identifier set
  *	@key: The key to find
  *	
- *	Requires ipc_ids.mutex locked.
- *	Returns the identifier if found or -1 if not.
+ *	Requires ipc_ids.rw_mutex locked.
+ *	Returns the LOCKED pointer to the ipc structure if found or NULL
+ *	if not.
+ *	If key is found ipc points to the owning ipc structure
  */
  
-int ipc_findkey(struct ipc_ids* ids, key_t key)
+static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 {
-	int id;
-	struct kern_ipc_perm* p;
-	int max_id = ids->max_id;
+	struct kern_ipc_perm *ipc;
+	int next_id;
+	int total;
 
-	/*
-	 * rcu_dereference() is not needed here
-	 * since ipc_ids.mutex is held
-	 */
-	for (id = 0; id <= max_id; id++) {
-		p = ids->entries->p[id];
-		if(p==NULL)
+	for (total = 0, next_id = 0; total < ids->in_use; next_id++) {
+		ipc = idr_find(&ids->ipcs_idr, next_id);
+
+		if (ipc == NULL)
 			continue;
-		if (key == p->key)
-			return id;
+
+		if (ipc->key != key) {
+			total++;
+			continue;
+		}
+
+		ipc_lock_by_ptr(ipc);
+		return ipc;
 	}
-	return -1;
+
+	return NULL;
 }
 
-/*
- * Requires ipc_ids.mutex locked
+/**
+ *	ipc_get_maxid 	-	get the last assigned id
+ *	@ids: IPC identifier set
+ *
+ *	Called with ipc_ids.rw_mutex held.
  */
-static int grow_ary(struct ipc_ids* ids, int newsize)
+
+int ipc_get_maxid(struct ipc_ids *ids)
 {
-	struct ipc_id_ary* new;
-	struct ipc_id_ary* old;
-	int i;
-	int size = ids->entries->size;
+	struct kern_ipc_perm *ipc;
+	int max_id = -1;
+	int total, id;
 
-	if(newsize > IPCMNI)
-		newsize = IPCMNI;
-	if(newsize <= size)
-		return newsize;
+	if (ids->in_use == 0)
+		return -1;
 
-	new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
-			    sizeof(struct ipc_id_ary));
-	if(new == NULL)
-		return size;
-	new->size = newsize;
-	memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size);
-	for(i=size;i<newsize;i++) {
-		new->p[i] = NULL;
+	if (ids->in_use == IPCMNI)
+		return IPCMNI - 1;
+
+	/* Look for the last assigned id */
+	total = 0;
+	for (id = 0; id < IPCMNI && total < ids->in_use; id++) {
+		ipc = idr_find(&ids->ipcs_idr, id);
+		if (ipc != NULL) {
+			max_id = id;
+			total++;
+		}
 	}
-	old = ids->entries;
-
-	/*
-	 * Use rcu_assign_pointer() to make sure the memcpyed contents
-	 * of the new array are visible before the new array becomes visible.
-	 */
-	rcu_assign_pointer(ids->entries, new);
-
-	__ipc_fini_ids(ids, old);
-	return newsize;
+	return max_id;
 }
 
 /**
  *	ipc_addid 	-	add an IPC identifier
  *	@ids: IPC identifier set
  *	@new: new IPC permission set
- *	@size: new size limit for the id array
+ *	@size: limit for the number of used ids
  *
- *	Add an entry 'new' to the IPC arrays. The permissions object is
+ *	Add an entry 'new' to the IPC ids idr. The permissions object is
  *	initialised and the first free entry is set up and the id assigned
- *	is returned. The list is returned in a locked state on success.
- *	On failure the list is not locked and -1 is returned.
+ *	is returned. The 'new' entry is returned in a locked state on success.
+ *	On failure the entry is not locked and a negative err-code is returned.
  *
- *	Called with ipc_ids.mutex held.
+ *	Called with ipc_ids.rw_mutex held as a writer.
  */
  
 int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 {
-	int id;
+	int id, err;
 
-	size = grow_ary(ids,size);
+	if (size > IPCMNI)
+		size = IPCMNI;
 
-	/*
-	 * rcu_dereference()() is not needed here since
-	 * ipc_ids.mutex is held
-	 */
-	for (id = 0; id < size; id++) {
-		if(ids->entries->p[id] == NULL)
-			goto found;
-	}
-	return -1;
-found:
+	if (ids->in_use >= size)
+		return -ENOSPC;
+
+	err = idr_get_new(&ids->ipcs_idr, new, &id);
+	if (err)
+		return err;
+
 	ids->in_use++;
-	if (id > ids->max_id)
-		ids->max_id = id;
 
 	new->cuid = new->uid = current->euid;
 	new->gid = new->cgid = current->egid;
@@ -313,48 +294,153 @@
 	new->deleted = 0;
 	rcu_read_lock();
 	spin_lock(&new->lock);
-	ids->entries->p[id] = new;
 	return id;
 }
 
 /**
- *	ipc_rmid	-	remove an IPC identifier
- *	@ids: identifier set
- *	@id: Identifier to remove
+ *	ipcget_new	-	create a new ipc object
+ *	@ns: namespace
+ *	@ids: IPC identifer set
+ *	@ops: the actual creation routine to call
+ *	@params: its parameters
  *
- *	The identifier must be valid, and in use. The kernel will panic if
- *	fed an invalid identifier. The entry is removed and internal
- *	variables recomputed. The object associated with the identifier
- *	is returned.
- *	ipc_ids.mutex and the spinlock for this ID is hold before this function
- *	is called, and remain locked on the exit.
+ *	This routine is called by sys_msgget, sys_semget() and sys_shmget()
+ *	when the key is IPC_PRIVATE.
+ */
+int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
+		struct ipc_ops *ops, struct ipc_params *params)
+{
+	int err;
+retry:
+	err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
+
+	if (!err)
+		return -ENOMEM;
+
+	down_write(&ids->rw_mutex);
+	err = ops->getnew(ns, params);
+	up_write(&ids->rw_mutex);
+
+	if (err == -EAGAIN)
+		goto retry;
+
+	return err;
+}
+
+/**
+ *	ipc_check_perms	-	check security and permissions for an IPC
+ *	@ipcp: ipc permission set
+ *	@ops: the actual security routine to call
+ *	@params: its parameters
+ *
+ *	This routine is called by sys_msgget(), sys_semget() and sys_shmget()
+ *      when the key is not IPC_PRIVATE and that key already exists in the
+ *      ids IDR.
+ *
+ *	On success, the IPC id is returned.
+ *
+ *	It is called with ipc_ids.rw_mutex and ipcp->lock held.
+ */
+static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops,
+			struct ipc_params *params)
+{
+	int err;
+
+	if (ipcperms(ipcp, params->flg))
+		err = -EACCES;
+	else {
+		err = ops->associate(ipcp, params->flg);
+		if (!err)
+			err = ipcp->id;
+	}
+
+	return err;
+}
+
+/**
+ *	ipcget_public	-	get an ipc object or create a new one
+ *	@ns: namespace
+ *	@ids: IPC identifer set
+ *	@ops: the actual creation routine to call
+ *	@params: its parameters
+ *
+ *	This routine is called by sys_msgget, sys_semget() and sys_shmget()
+ *	when the key is not IPC_PRIVATE.
+ *	It adds a new entry if the key is not found and does some permission
+ *      / security checkings if the key is found.
+ *
+ *	On success, the ipc id is returned.
+ */
+int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
+		struct ipc_ops *ops, struct ipc_params *params)
+{
+	struct kern_ipc_perm *ipcp;
+	int flg = params->flg;
+	int err;
+retry:
+	err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
+
+	/*
+	 * Take the lock as a writer since we are potentially going to add
+	 * a new entry + read locks are not "upgradable"
+	 */
+	down_write(&ids->rw_mutex);
+	ipcp = ipc_findkey(ids, params->key);
+	if (ipcp == NULL) {
+		/* key not used */
+		if (!(flg & IPC_CREAT))
+			err = -ENOENT;
+		else if (!err)
+			err = -ENOMEM;
+		else
+			err = ops->getnew(ns, params);
+	} else {
+		/* ipc object has been locked by ipc_findkey() */
+
+		if (flg & IPC_CREAT && flg & IPC_EXCL)
+			err = -EEXIST;
+		else {
+			err = 0;
+			if (ops->more_checks)
+				err = ops->more_checks(ipcp, params);
+			if (!err)
+				/*
+				 * ipc_check_perms returns the IPC id on
+				 * success
+				 */
+				err = ipc_check_perms(ipcp, ops, params);
+		}
+		ipc_unlock(ipcp);
+	}
+	up_write(&ids->rw_mutex);
+
+	if (err == -EAGAIN)
+		goto retry;
+
+	return err;
+}
+
+
+/**
+ *	ipc_rmid	-	remove an IPC identifier
+ *	@ids: IPC identifier set
+ *	@ipcp: ipc perm structure containing the identifier to remove
+ *
+ *	ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held
+ *	before this function is called, and remain locked on the exit.
  */
  
-struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id)
+void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 {
-	struct kern_ipc_perm* p;
-	int lid = id % SEQ_MULTIPLIER;
-	BUG_ON(lid >= ids->entries->size);
+	int lid = ipcid_to_idx(ipcp->id);
 
-	/* 
-	 * do not need a rcu_dereference()() here to force ordering
-	 * on Alpha, since the ipc_ids.mutex is held.
-	 */	
-	p = ids->entries->p[lid];
-	ids->entries->p[lid] = NULL;
-	BUG_ON(p==NULL);
+	idr_remove(&ids->ipcs_idr, lid);
+
 	ids->in_use--;
 
-	if (lid == ids->max_id) {
-		do {
-			lid--;
-			if(lid == -1)
-				break;
-		} while (ids->entries->p[lid] == NULL);
-		ids->max_id = lid;
-	}
-	p->deleted = 1;
-	return p;
+	ipcp->deleted = 1;
+
+	return;
 }
 
 /**
@@ -491,10 +577,12 @@
  */
 static void ipc_schedule_free(struct rcu_head *head)
 {
-	struct ipc_rcu_grace *grace =
-		container_of(head, struct ipc_rcu_grace, rcu);
-	struct ipc_rcu_sched *sched =
-			container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
+	struct ipc_rcu_grace *grace;
+	struct ipc_rcu_sched *sched;
+
+	grace = container_of(head, struct ipc_rcu_grace, rcu);
+	sched = container_of(&(grace->data[0]), struct ipc_rcu_sched,
+				data[0]);
 
 	INIT_WORK(&sched->work, ipc_do_vfree);
 	schedule_work(&sched->work);
@@ -583,7 +671,7 @@
 }
 
 /**
- *	ipc64_perm_to_ipc_perm	-	convert old ipc permissions to new
+ *	ipc64_perm_to_ipc_perm	-	convert new ipc permissions to old
  *	@in: new style IPC permissions
  *	@out: old style IPC permissions
  *
@@ -602,44 +690,37 @@
 	out->seq	= in->seq;
 }
 
-/*
- * So far only shm_get_stat() calls ipc_get() via shm_get(), so ipc_get()
- * is called with shm_ids.mutex locked.  Since grow_ary() is also called with
- * shm_ids.mutex down(for Shared Memory), there is no need to add read
- * barriers here to gurantee the writes in grow_ary() are seen in order 
- * here (for Alpha).
+/**
+ * ipc_lock - Lock an ipc structure without rw_mutex held
+ * @ids: IPC identifier set
+ * @id: ipc id to look for
  *
- * However ipc_get() itself does not necessary require ipc_ids.mutex down. So
- * if in the future ipc_get() is used by other places without ipc_ids.mutex
- * down, then ipc_get() needs read memery barriers as ipc_lock() does.
+ * Look for an id in the ipc ids idr and lock the associated ipc object.
+ *
+ * The ipc object is locked on exit.
+ *
+ * This is the routine that should be called when the rw_mutex is not already
+ * held, i.e. idr tree not protected: it protects the idr tree in read mode
+ * during the idr_find().
  */
-struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id)
-{
-	struct kern_ipc_perm* out;
-	int lid = id % SEQ_MULTIPLIER;
-	if(lid >= ids->entries->size)
-		return NULL;
-	out = ids->entries->p[lid];
-	return out;
-}
 
-struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
+struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
 {
-	struct kern_ipc_perm* out;
-	int lid = id % SEQ_MULTIPLIER;
-	struct ipc_id_ary* entries;
+	struct kern_ipc_perm *out;
+	int lid = ipcid_to_idx(id);
+
+	down_read(&ids->rw_mutex);
 
 	rcu_read_lock();
-	entries = rcu_dereference(ids->entries);
-	if(lid >= entries->size) {
+	out = idr_find(&ids->ipcs_idr, lid);
+	if (out == NULL) {
 		rcu_read_unlock();
-		return NULL;
+		up_read(&ids->rw_mutex);
+		return ERR_PTR(-EINVAL);
 	}
-	out = entries->p[lid];
-	if(out == NULL) {
-		rcu_read_unlock();
-		return NULL;
-	}
+
+	up_read(&ids->rw_mutex);
+
 	spin_lock(&out->lock);
 	
 	/* ipc_rmid() may have already freed the ID while ipc_lock
@@ -648,33 +729,44 @@
 	if (out->deleted) {
 		spin_unlock(&out->lock);
 		rcu_read_unlock();
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	}
+
 	return out;
 }
 
-void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
+/**
+ * ipc_lock_down - Lock an ipc structure with rw_sem held
+ * @ids: IPC identifier set
+ * @id: ipc id to look for
+ *
+ * Look for an id in the ipc ids idr and lock the associated ipc object.
+ *
+ * The ipc object is locked on exit.
+ *
+ * This is the routine that should be called when the rw_mutex is already
+ * held, i.e. idr tree protected.
+ */
+
+struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id)
 {
+	struct kern_ipc_perm *out;
+	int lid = ipcid_to_idx(id);
+
 	rcu_read_lock();
-	spin_lock(&perm->lock);
-}
+	out = idr_find(&ids->ipcs_idr, lid);
+	if (out == NULL) {
+		rcu_read_unlock();
+		return ERR_PTR(-EINVAL);
+	}
 
-void ipc_unlock(struct kern_ipc_perm* perm)
-{
-	spin_unlock(&perm->lock);
-	rcu_read_unlock();
-}
+	spin_lock(&out->lock);
 
-int ipc_buildid(struct ipc_ids* ids, int id, int seq)
-{
-	return SEQ_MULTIPLIER*seq + id;
-}
-
-int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid)
-{
-	if(uid/SEQ_MULTIPLIER != ipcp->seq)
-		return 1;
-	return 0;
+	/*
+	 * No need to verify that the structure is still valid since the
+	 * rw_mutex is held.
+	 */
+	return out;
 }
 
 #ifdef __ARCH_WANT_IPC_PARSE_VERSION
@@ -707,27 +799,30 @@
 	struct ipc_proc_iface *iface;
 };
 
-static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
+/*
+ * This routine locks the ipc structure found at least at position pos.
+ */
+struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
+					loff_t *new_pos)
 {
-	struct ipc_proc_iter *iter = s->private;
-	struct ipc_proc_iface *iface = iter->iface;
-	struct kern_ipc_perm *ipc = it;
-	loff_t p;
-	struct ipc_ids *ids;
+	struct kern_ipc_perm *ipc;
+	int total, id;
 
-	ids = iter->ns->ids[iface->ids];
+	total = 0;
+	for (id = 0; id < pos && total < ids->in_use; id++) {
+		ipc = idr_find(&ids->ipcs_idr, id);
+		if (ipc != NULL)
+			total++;
+	}
 
-	/* If we had an ipc id locked before, unlock it */
-	if (ipc && ipc != SEQ_START_TOKEN)
-		ipc_unlock(ipc);
+	if (total >= ids->in_use)
+		return NULL;
 
-	/*
-	 * p = *pos - 1 (because id 0 starts at position 1)
-	 *          + 1 (because we increment the position by one)
-	 */
-	for (p = *pos; p <= ids->max_id; p++) {
-		if ((ipc = ipc_lock(ids, p)) != NULL) {
-			*pos = p + 1;
+	for ( ; pos < IPCMNI; pos++) {
+		ipc = idr_find(&ids->ipcs_idr, pos);
+		if (ipc != NULL) {
+			*new_pos = pos + 1;
+			ipc_lock_by_ptr(ipc);
 			return ipc;
 		}
 	}
@@ -736,16 +831,27 @@
 	return NULL;
 }
 
+static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
+{
+	struct ipc_proc_iter *iter = s->private;
+	struct ipc_proc_iface *iface = iter->iface;
+	struct kern_ipc_perm *ipc = it;
+
+	/* If we had an ipc id locked before, unlock it */
+	if (ipc && ipc != SEQ_START_TOKEN)
+		ipc_unlock(ipc);
+
+	return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos);
+}
+
 /*
- * File positions: pos 0 -> header, pos n -> ipc id + 1.
- * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START.
+ * File positions: pos 0 -> header, pos n -> ipc id = n - 1.
+ * SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START.
  */
 static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
 {
 	struct ipc_proc_iter *iter = s->private;
 	struct ipc_proc_iface *iface = iter->iface;
-	struct kern_ipc_perm *ipc;
-	loff_t p;
 	struct ipc_ids *ids;
 
 	ids = iter->ns->ids[iface->ids];
@@ -754,7 +860,7 @@
 	 * Take the lock - this will be released by the corresponding
 	 * call to stop().
 	 */
-	mutex_lock(&ids->mutex);
+	down_read(&ids->rw_mutex);
 
 	/* pos < 0 is invalid */
 	if (*pos < 0)
@@ -765,13 +871,7 @@
 		return SEQ_START_TOKEN;
 
 	/* Find the (pos-1)th ipc */
-	for (p = *pos - 1; p <= ids->max_id; p++) {
-		if ((ipc = ipc_lock(ids, p)) != NULL) {
-			*pos = p + 1;
-			return ipc;
-		}
-	}
-	return NULL;
+	return sysvipc_find_ipc(ids, *pos - 1, pos);
 }
 
 static void sysvipc_proc_stop(struct seq_file *s, void *it)
@@ -781,13 +881,13 @@
 	struct ipc_proc_iface *iface = iter->iface;
 	struct ipc_ids *ids;
 
-	/* If we had a locked segment, release it */
+	/* If we had a locked structure, release it */
 	if (ipc && ipc != SEQ_START_TOKEN)
 		ipc_unlock(ipc);
 
 	ids = iter->ns->ids[iface->ids];
 	/* Release the lock we took in start() */
-	mutex_unlock(&ids->mutex);
+	up_read(&ids->rw_mutex);
 }
 
 static int sysvipc_proc_show(struct seq_file *s, void *it)
diff --git a/ipc/util.h b/ipc/util.h
index 333e891..9ffea40 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -10,6 +10,9 @@
 #ifndef _IPC_UTIL_H
 #define _IPC_UTIL_H
 
+#include <linux/idr.h>
+#include <linux/err.h>
+
 #define USHRT_MAX 0xffff
 #define SEQ_MULTIPLIER	(IPCMNI)
 
@@ -25,24 +28,46 @@
 void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 
-struct ipc_id_ary {
-	int size;
-	struct kern_ipc_perm *p[0];
-};
-
 struct ipc_ids {
 	int in_use;
-	int max_id;
 	unsigned short seq;
 	unsigned short seq_max;
-	struct mutex mutex;
-	struct ipc_id_ary nullentry;
-	struct ipc_id_ary* entries;
+	struct rw_semaphore rw_mutex;
+	struct idr ipcs_idr;
+};
+
+/*
+ * Structure that holds the parameters needed by the ipc operations
+ * (see after)
+ */
+struct ipc_params {
+	key_t key;
+	int flg;
+	union {
+		size_t size;	/* for shared memories */
+		int nsems;	/* for semaphores */
+	} u;			/* holds the getnew() specific param */
+};
+
+/*
+ * Structure that holds some ipc operations. This structure is used to unify
+ * the calls to sys_msgget(), sys_semget(), sys_shmget()
+ *      . routine to call to create a new ipc object. Can be one of newque,
+ *        newary, newseg
+ *      . routine to call to check permissions for a new ipc object.
+ *        Can be one of security_msg_associate, security_sem_associate,
+ *        security_shm_associate
+ *      . routine to call for an extra check if needed
+ */
+struct ipc_ops {
+	int (*getnew) (struct ipc_namespace *, struct ipc_params *);
+	int (*associate) (struct kern_ipc_perm *, int);
+	int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *);
 };
 
 struct seq_file;
 
-void ipc_init_ids(struct ipc_ids *ids, int size);
+void ipc_init_ids(struct ipc_ids *);
 #ifdef CONFIG_PROC_FS
 void __init ipc_init_proc_interface(const char *path, const char *header,
 		int ids, int (*show)(struct seq_file *, void *));
@@ -54,14 +79,19 @@
 #define IPC_MSG_IDS	1
 #define IPC_SHM_IDS	2
 
-/* must be called with ids->mutex acquired.*/
-int ipc_findkey(struct ipc_ids* ids, key_t key);
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
+#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
+
+/* must be called with ids->rw_mutex acquired for writing */
+int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
+
+/* must be called with ids->rw_mutex acquired for reading */
+int ipc_get_maxid(struct ipc_ids *);
 
 /* must be called with both locks acquired. */
-struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
+void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
 
-int ipcperms (struct kern_ipc_perm *ipcp, short flg);
+/* must be called with ipcp locked */
+int ipcperms(struct kern_ipc_perm *ipcp, short flg);
 
 /* for rare, potentially huge allocations.
  * both function can sleep
@@ -79,24 +109,12 @@
 void ipc_rcu_getref(void *ptr);
 void ipc_rcu_putref(void *ptr);
 
-static inline void __ipc_fini_ids(struct ipc_ids *ids,
-		struct ipc_id_ary *entries)
-{
-	if (entries != &ids->nullentry)
-		ipc_rcu_putref(entries);
-}
-
-static inline void ipc_fini_ids(struct ipc_ids *ids)
-{
-	__ipc_fini_ids(ids, ids->entries);
-}
-
-struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id);
-struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id);
-void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp);
-void ipc_unlock(struct kern_ipc_perm* perm);
-int ipc_buildid(struct ipc_ids* ids, int id, int seq);
-int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid);
+/*
+ * ipc_lock_down: called with rw_mutex held
+ * ipc_lock: called without that lock held
+ */
+struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int);
+struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 
 void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
 void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
@@ -111,5 +129,89 @@
 extern void free_msg(struct msg_msg *msg);
 extern struct msg_msg *load_msg(const void __user *src, int len);
 extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
+extern int ipcget_new(struct ipc_namespace *, struct ipc_ids *,
+			struct ipc_ops *, struct ipc_params *);
+extern int ipcget_public(struct ipc_namespace *, struct ipc_ids *,
+			struct ipc_ops *, struct ipc_params *);
+
+static inline int ipc_buildid(int id, int seq)
+{
+	return SEQ_MULTIPLIER * seq + id;
+}
+
+/*
+ * Must be called with ipcp locked
+ */
+static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
+{
+	if (uid / SEQ_MULTIPLIER != ipcp->seq)
+		return 1;
+	return 0;
+}
+
+static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
+{
+	rcu_read_lock();
+	spin_lock(&perm->lock);
+}
+
+static inline void ipc_unlock(struct kern_ipc_perm *perm)
+{
+	spin_unlock(&perm->lock);
+	rcu_read_unlock();
+}
+
+static inline struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids,
+						int id)
+{
+	struct kern_ipc_perm *out;
+
+	out = ipc_lock_down(ids, id);
+	if (IS_ERR(out))
+		return out;
+
+	if (ipc_checkid(out, id)) {
+		ipc_unlock(out);
+		return ERR_PTR(-EIDRM);
+	}
+
+	return out;
+}
+
+static inline struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids,
+						int id)
+{
+	struct kern_ipc_perm *out;
+
+	out = ipc_lock(ids, id);
+	if (IS_ERR(out))
+		return out;
+
+	if (ipc_checkid(out, id)) {
+		ipc_unlock(out);
+		return ERR_PTR(-EIDRM);
+	}
+
+	return out;
+}
+
+/**
+ * ipcget - Common sys_*get() code
+ * @ns : namsepace
+ * @ids : IPC identifier set
+ * @ops : operations to be called on ipc object creation, permission checks
+ *        and further checks
+ * @params : the parameters needed by the previous operations.
+ *
+ * Common routine called by sys_msgget(), sys_semget() and sys_shmget().
+ */
+static inline int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
+			struct ipc_ops *ops, struct ipc_params *params)
+{
+	if (params->key == IPC_PRIVATE)
+		return ipcget_new(ns, ids, ops, params);
+	else
+		return ipcget_public(ns, ids, ops, params);
+}
 
 #endif
diff --git a/kernel/Kconfig.instrumentation b/kernel/Kconfig.instrumentation
new file mode 100644
index 0000000..f5f2c76
--- /dev/null
+++ b/kernel/Kconfig.instrumentation
@@ -0,0 +1,49 @@
+menuconfig INSTRUMENTATION
+	bool "Instrumentation Support"
+	default y
+	---help---
+	  Say Y here to get to see options related to performance measurement,
+	  system-wide debugging, and testing. This option alone does not add any
+	  kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled. If you're trying to debug the kernel itself, go see the
+	  Kernel Hacking menu.
+
+if INSTRUMENTATION
+
+config PROFILING
+	bool "Profiling support (EXPERIMENTAL)"
+	help
+	  Say Y here to enable the extended profiling support mechanisms used
+	  by profilers such as OProfile.
+
+config OPROFILE
+	tristate "OProfile system profiling (EXPERIMENTAL)"
+	depends on PROFILING
+	depends on ALPHA || ARM || BLACKFIN || X86_32 || IA64 || M32R || MIPS || PARISC || PPC || S390 || SUPERH || SPARC || X86_64
+	help
+	  OProfile is a profiling system capable of profiling the
+	  whole system, include the kernel, kernel modules, libraries,
+	  and applications.
+
+	  If unsure, say N.
+
+config KPROBES
+	bool "Kprobes"
+	depends on KALLSYMS && MODULES
+	depends on X86_32 || IA64 || PPC || S390 || SPARC64 || X86_64 || AVR32
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.  register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+
+config MARKERS
+	bool "Activate markers"
+	help
+	  Place an empty function call at each marker site. Can be
+	  dynamically changed for a probe function.
+
+endif # INSTRUMENTATION
diff --git a/kernel/Makefile b/kernel/Makefile
index d63fbb1..79f017e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,9 +8,10 @@
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
-	    utsname.o sysctl_check.o
+	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
+	    utsname.o notifier.o
 
+obj-$(CONFIG_SYSCTL) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
@@ -36,7 +37,11 @@
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_CGROUPS) += cgroup.o
+obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
+obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o
+obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
@@ -51,6 +56,7 @@
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
+obj-$(CONFIG_MARKERS) += marker.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/capability.c b/kernel/capability.c
index cbc5fd6..efbd9cd 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
 
 /*
@@ -61,8 +62,8 @@
 	spin_lock(&task_capability_lock);
 	read_lock(&tasklist_lock);
 
-	if (pid && pid != current->pid) {
-		target = find_task_by_pid(pid);
+	if (pid && pid != task_pid_vnr(current)) {
+		target = find_task_by_vpid(pid);
 		if (!target) {
 			ret = -ESRCH;
 			goto out;
@@ -95,7 +96,7 @@
 	int found = 0;
 	struct pid *pgrp;
 
-	pgrp = find_pid(pgrp_nr);
+	pgrp = find_vpid(pgrp_nr);
 	do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
 		target = g;
 		while_each_thread(g, target) {
@@ -129,7 +130,7 @@
      int found = 0;
 
      do_each_thread(g, target) {
-             if (target == current || is_init(target))
+             if (target == current || is_container_init(target->group_leader))
                      continue;
              found = 1;
 	     if (security_capset_check(target, effective, inheritable,
@@ -184,7 +185,7 @@
 	if (get_user(pid, &header->pid))
 		return -EFAULT;
 
-	if (pid && pid != current->pid && !capable(CAP_SETPCAP))
+	if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
 		return -EPERM;
 
 	if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
@@ -195,8 +196,8 @@
 	spin_lock(&task_capability_lock);
 	read_lock(&tasklist_lock);
 
-	if (pid > 0 && pid != current->pid) {
-		target = find_task_by_pid(pid);
+	if (pid > 0 && pid != task_pid_vnr(current)) {
+		target = find_task_by_vpid(pid);
 		if (!target) {
 			ret = -ESRCH;
 			goto out;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
new file mode 100644
index 0000000..5987dcc
--- /dev/null
+++ b/kernel/cgroup.c
@@ -0,0 +1,2805 @@
+/*
+ *  kernel/cgroup.c
+ *
+ *  Generic process-grouping system.
+ *
+ *  Based originally on the cpuset system, extracted by Paul Menage
+ *  Copyright (C) 2006 Google, Inc
+ *
+ *  Copyright notices from the original cpuset code:
+ *  --------------------------------------------------
+ *  Copyright (C) 2003 BULL SA.
+ *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *
+ *  Portions derived from Patrick Mochel's sysfs code.
+ *  sysfs is Copyright (c) 2001-3 Patrick Mochel
+ *
+ *  2003-10-10 Written by Simon Derr.
+ *  2003-10-22 Updates by Stephen Hemminger.
+ *  2004 May-July Rework by Paul Jackson.
+ *  ---------------------------------------------------
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#include <linux/cgroup.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/backing-dev.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/magic.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sort.h>
+#include <linux/kmod.h>
+#include <linux/delayacct.h>
+#include <linux/cgroupstats.h>
+
+#include <asm/atomic.h>
+
+static DEFINE_MUTEX(cgroup_mutex);
+
+/* Generate an array of cgroup subsystem pointers */
+#define SUBSYS(_x) &_x ## _subsys,
+
+static struct cgroup_subsys *subsys[] = {
+#include <linux/cgroup_subsys.h>
+};
+
+/*
+ * A cgroupfs_root represents the root of a cgroup hierarchy,
+ * and may be associated with a superblock to form an active
+ * hierarchy
+ */
+struct cgroupfs_root {
+	struct super_block *sb;
+
+	/*
+	 * The bitmask of subsystems intended to be attached to this
+	 * hierarchy
+	 */
+	unsigned long subsys_bits;
+
+	/* The bitmask of subsystems currently attached to this hierarchy */
+	unsigned long actual_subsys_bits;
+
+	/* A list running through the attached subsystems */
+	struct list_head subsys_list;
+
+	/* The root cgroup for this hierarchy */
+	struct cgroup top_cgroup;
+
+	/* Tracks how many cgroups are currently defined in hierarchy.*/
+	int number_of_cgroups;
+
+	/* A list running through the mounted hierarchies */
+	struct list_head root_list;
+
+	/* Hierarchy-specific flags */
+	unsigned long flags;
+
+	/* The path to use for release notifications. No locking
+	 * between setting and use - so if userspace updates this
+	 * while child cgroups exist, you could miss a
+	 * notification. We ensure that it's always a valid
+	 * NUL-terminated string */
+	char release_agent_path[PATH_MAX];
+};
+
+
+/*
+ * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
+ * subsystems that are otherwise unattached - it never has more than a
+ * single cgroup, and all tasks are part of that cgroup.
+ */
+static struct cgroupfs_root rootnode;
+
+/* The list of hierarchy roots */
+
+static LIST_HEAD(roots);
+static int root_count;
+
+/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
+#define dummytop (&rootnode.top_cgroup)
+
+/* This flag indicates whether tasks in the fork and exit paths should
+ * take callback_mutex and check for fork/exit handlers to call. This
+ * avoids us having to do extra work in the fork/exit path if none of the
+ * subsystems need to be called.
+ */
+static int need_forkexit_callback;
+
+/* bits in struct cgroup flags field */
+enum {
+	/* Control Group is dead */
+	CGRP_REMOVED,
+	/* Control Group has previously had a child cgroup or a task,
+	 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */
+	CGRP_RELEASABLE,
+	/* Control Group requires release notifications to userspace */
+	CGRP_NOTIFY_ON_RELEASE,
+};
+
+/* convenient tests for these bits */
+inline int cgroup_is_removed(const struct cgroup *cgrp)
+{
+	return test_bit(CGRP_REMOVED, &cgrp->flags);
+}
+
+/* bits in struct cgroupfs_root flags field */
+enum {
+	ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
+};
+
+inline int cgroup_is_releasable(const struct cgroup *cgrp)
+{
+	const int bits =
+		(1 << CGRP_RELEASABLE) |
+		(1 << CGRP_NOTIFY_ON_RELEASE);
+	return (cgrp->flags & bits) == bits;
+}
+
+inline int notify_on_release(const struct cgroup *cgrp)
+{
+	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+}
+
+/*
+ * for_each_subsys() allows you to iterate on each subsystem attached to
+ * an active hierarchy
+ */
+#define for_each_subsys(_root, _ss) \
+list_for_each_entry(_ss, &_root->subsys_list, sibling)
+
+/* for_each_root() allows you to iterate across the active hierarchies */
+#define for_each_root(_root) \
+list_for_each_entry(_root, &roots, root_list)
+
+/* the list of cgroups eligible for automatic release. Protected by
+ * release_list_lock */
+static LIST_HEAD(release_list);
+static DEFINE_SPINLOCK(release_list_lock);
+static void cgroup_release_agent(struct work_struct *work);
+static DECLARE_WORK(release_agent_work, cgroup_release_agent);
+static void check_for_release(struct cgroup *cgrp);
+
+/* Link structure for associating css_set objects with cgroups */
+struct cg_cgroup_link {
+	/*
+	 * List running through cg_cgroup_links associated with a
+	 * cgroup, anchored on cgroup->css_sets
+	 */
+	struct list_head cgrp_link_list;
+	/*
+	 * List running through cg_cgroup_links pointing at a
+	 * single css_set object, anchored on css_set->cg_links
+	 */
+	struct list_head cg_link_list;
+	struct css_set *cg;
+};
+
+/* The default css_set - used by init and its children prior to any
+ * hierarchies being mounted. It contains a pointer to the root state
+ * for each subsystem. Also used to anchor the list of css_sets. Not
+ * reference-counted, to improve performance when child cgroups
+ * haven't been created.
+ */
+
+static struct css_set init_css_set;
+static struct cg_cgroup_link init_css_set_link;
+
+/* css_set_lock protects the list of css_set objects, and the
+ * chain of tasks off each css_set.  Nests outside task->alloc_lock
+ * due to cgroup_iter_start() */
+static DEFINE_RWLOCK(css_set_lock);
+static int css_set_count;
+
+/* We don't maintain the lists running through each css_set to its
+ * task until after the first call to cgroup_iter_start(). This
+ * reduces the fork()/exit() overhead for people who have cgroups
+ * compiled into their kernel but not actually in use */
+static int use_task_css_set_links;
+
+/* When we create or destroy a css_set, the operation simply
+ * takes/releases a reference count on all the cgroups referenced
+ * by subsystems in this css_set. This can end up multiple-counting
+ * some cgroups, but that's OK - the ref-count is just a
+ * busy/not-busy indicator; ensuring that we only count each cgroup
+ * once would require taking a global lock to ensure that no
+ * subsystems moved between hierarchies while we were doing so.
+ *
+ * Possible TODO: decide at boot time based on the number of
+ * registered subsystems and the number of CPUs or NUMA nodes whether
+ * it's better for performance to ref-count every subsystem, or to
+ * take a global lock and only add one ref count to each hierarchy.
+ */
+
+/*
+ * unlink a css_set from the list and free it
+ */
+static void unlink_css_set(struct css_set *cg)
+{
+	write_lock(&css_set_lock);
+	list_del(&cg->list);
+	css_set_count--;
+	while (!list_empty(&cg->cg_links)) {
+		struct cg_cgroup_link *link;
+		link = list_entry(cg->cg_links.next,
+				  struct cg_cgroup_link, cg_link_list);
+		list_del(&link->cg_link_list);
+		list_del(&link->cgrp_link_list);
+		kfree(link);
+	}
+	write_unlock(&css_set_lock);
+}
+
+static void __release_css_set(struct kref *k, int taskexit)
+{
+	int i;
+	struct css_set *cg = container_of(k, struct css_set, ref);
+
+	unlink_css_set(cg);
+
+	rcu_read_lock();
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup *cgrp = cg->subsys[i]->cgroup;
+		if (atomic_dec_and_test(&cgrp->count) &&
+		    notify_on_release(cgrp)) {
+			if (taskexit)
+				set_bit(CGRP_RELEASABLE, &cgrp->flags);
+			check_for_release(cgrp);
+		}
+	}
+	rcu_read_unlock();
+	kfree(cg);
+}
+
+static void release_css_set(struct kref *k)
+{
+	__release_css_set(k, 0);
+}
+
+static void release_css_set_taskexit(struct kref *k)
+{
+	__release_css_set(k, 1);
+}
+
+/*
+ * refcounted get/put for css_set objects
+ */
+static inline void get_css_set(struct css_set *cg)
+{
+	kref_get(&cg->ref);
+}
+
+static inline void put_css_set(struct css_set *cg)
+{
+	kref_put(&cg->ref, release_css_set);
+}
+
+static inline void put_css_set_taskexit(struct css_set *cg)
+{
+	kref_put(&cg->ref, release_css_set_taskexit);
+}
+
+/*
+ * find_existing_css_set() is a helper for
+ * find_css_set(), and checks to see whether an existing
+ * css_set is suitable. This currently walks a linked-list for
+ * simplicity; a later patch will use a hash table for better
+ * performance
+ *
+ * oldcg: the cgroup group that we're using before the cgroup
+ * transition
+ *
+ * cgrp: the cgroup that we're moving into
+ *
+ * template: location in which to build the desired set of subsystem
+ * state objects for the new cgroup group
+ */
+
+static struct css_set *find_existing_css_set(
+	struct css_set *oldcg,
+	struct cgroup *cgrp,
+	struct cgroup_subsys_state *template[])
+{
+	int i;
+	struct cgroupfs_root *root = cgrp->root;
+	struct list_head *l = &init_css_set.list;
+
+	/* Built the set of subsystem state objects that we want to
+	 * see in the new css_set */
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		if (root->subsys_bits & (1ull << i)) {
+			/* Subsystem is in this hierarchy. So we want
+			 * the subsystem state from the new
+			 * cgroup */
+			template[i] = cgrp->subsys[i];
+		} else {
+			/* Subsystem is not in this hierarchy, so we
+			 * don't want to change the subsystem state */
+			template[i] = oldcg->subsys[i];
+		}
+	}
+
+	/* Look through existing cgroup groups to find one to reuse */
+	do {
+		struct css_set *cg =
+			list_entry(l, struct css_set, list);
+
+		if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
+			/* All subsystems matched */
+			return cg;
+		}
+		/* Try the next cgroup group */
+		l = l->next;
+	} while (l != &init_css_set.list);
+
+	/* No existing cgroup group matched */
+	return NULL;
+}
+
+/*
+ * allocate_cg_links() allocates "count" cg_cgroup_link structures
+ * and chains them on tmp through their cgrp_link_list fields. Returns 0 on
+ * success or a negative error
+ */
+
+static int allocate_cg_links(int count, struct list_head *tmp)
+{
+	struct cg_cgroup_link *link;
+	int i;
+	INIT_LIST_HEAD(tmp);
+	for (i = 0; i < count; i++) {
+		link = kmalloc(sizeof(*link), GFP_KERNEL);
+		if (!link) {
+			while (!list_empty(tmp)) {
+				link = list_entry(tmp->next,
+						  struct cg_cgroup_link,
+						  cgrp_link_list);
+				list_del(&link->cgrp_link_list);
+				kfree(link);
+			}
+			return -ENOMEM;
+		}
+		list_add(&link->cgrp_link_list, tmp);
+	}
+	return 0;
+}
+
+static void free_cg_links(struct list_head *tmp)
+{
+	while (!list_empty(tmp)) {
+		struct cg_cgroup_link *link;
+		link = list_entry(tmp->next,
+				  struct cg_cgroup_link,
+				  cgrp_link_list);
+		list_del(&link->cgrp_link_list);
+		kfree(link);
+	}
+}
+
+/*
+ * find_css_set() takes an existing cgroup group and a
+ * cgroup object, and returns a css_set object that's
+ * equivalent to the old group, but with the given cgroup
+ * substituted into the appropriate hierarchy. Must be called with
+ * cgroup_mutex held
+ */
+
+static struct css_set *find_css_set(
+	struct css_set *oldcg, struct cgroup *cgrp)
+{
+	struct css_set *res;
+	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
+	int i;
+
+	struct list_head tmp_cg_links;
+	struct cg_cgroup_link *link;
+
+	/* First see if we already have a cgroup group that matches
+	 * the desired set */
+	write_lock(&css_set_lock);
+	res = find_existing_css_set(oldcg, cgrp, template);
+	if (res)
+		get_css_set(res);
+	write_unlock(&css_set_lock);
+
+	if (res)
+		return res;
+
+	res = kmalloc(sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return NULL;
+
+	/* Allocate all the cg_cgroup_link objects that we'll need */
+	if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
+		kfree(res);
+		return NULL;
+	}
+
+	kref_init(&res->ref);
+	INIT_LIST_HEAD(&res->cg_links);
+	INIT_LIST_HEAD(&res->tasks);
+
+	/* Copy the set of subsystem state objects generated in
+	 * find_existing_css_set() */
+	memcpy(res->subsys, template, sizeof(res->subsys));
+
+	write_lock(&css_set_lock);
+	/* Add reference counts and links from the new css_set. */
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup *cgrp = res->subsys[i]->cgroup;
+		struct cgroup_subsys *ss = subsys[i];
+		atomic_inc(&cgrp->count);
+		/*
+		 * We want to add a link once per cgroup, so we
+		 * only do it for the first subsystem in each
+		 * hierarchy
+		 */
+		if (ss->root->subsys_list.next == &ss->sibling) {
+			BUG_ON(list_empty(&tmp_cg_links));
+			link = list_entry(tmp_cg_links.next,
+					  struct cg_cgroup_link,
+					  cgrp_link_list);
+			list_del(&link->cgrp_link_list);
+			list_add(&link->cgrp_link_list, &cgrp->css_sets);
+			link->cg = res;
+			list_add(&link->cg_link_list, &res->cg_links);
+		}
+	}
+	if (list_empty(&rootnode.subsys_list)) {
+		link = list_entry(tmp_cg_links.next,
+				  struct cg_cgroup_link,
+				  cgrp_link_list);
+		list_del(&link->cgrp_link_list);
+		list_add(&link->cgrp_link_list, &dummytop->css_sets);
+		link->cg = res;
+		list_add(&link->cg_link_list, &res->cg_links);
+	}
+
+	BUG_ON(!list_empty(&tmp_cg_links));
+
+	/* Link this cgroup group into the list */
+	list_add(&res->list, &init_css_set.list);
+	css_set_count++;
+	INIT_LIST_HEAD(&res->tasks);
+	write_unlock(&css_set_lock);
+
+	return res;
+}
+
+/*
+ * There is one global cgroup mutex. We also require taking
+ * task_lock() when dereferencing a task's cgroup subsys pointers.
+ * See "The task_lock() exception", at the end of this comment.
+ *
+ * A task must hold cgroup_mutex to modify cgroups.
+ *
+ * Any task can increment and decrement the count field without lock.
+ * So in general, code holding cgroup_mutex can't rely on the count
+ * field not changing.  However, if the count goes to zero, then only
+ * attach_task() can increment it again.  Because a count of zero
+ * means that no tasks are currently attached, therefore there is no
+ * way a task attached to that cgroup can fork (the other way to
+ * increment the count).  So code holding cgroup_mutex can safely
+ * assume that if the count is zero, it will stay zero. Similarly, if
+ * a task holds cgroup_mutex on a cgroup with zero count, it
+ * knows that the cgroup won't be removed, as cgroup_rmdir()
+ * needs that mutex.
+ *
+ * The cgroup_common_file_write handler for operations that modify
+ * the cgroup hierarchy holds cgroup_mutex across the entire operation,
+ * single threading all such cgroup modifications across the system.
+ *
+ * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
+ * (usually) take cgroup_mutex.  These are the two most performance
+ * critical pieces of code here.  The exception occurs on cgroup_exit(),
+ * when a task in a notify_on_release cgroup exits.  Then cgroup_mutex
+ * is taken, and if the cgroup count is zero, a usermode call made
+ * to /sbin/cgroup_release_agent with the name of the cgroup (path
+ * relative to the root of cgroup file system) as the argument.
+ *
+ * A cgroup can only be deleted if both its 'count' of using tasks
+ * is zero, and its list of 'children' cgroups is empty.  Since all
+ * tasks in the system use _some_ cgroup, and since there is always at
+ * least one task in the system (init, pid == 1), therefore, top_cgroup
+ * always has either children cgroups and/or using tasks.  So we don't
+ * need a special hack to ensure that top_cgroup cannot be deleted.
+ *
+ *	The task_lock() exception
+ *
+ * The need for this exception arises from the action of
+ * attach_task(), which overwrites one tasks cgroup pointer with
+ * another.  It does so using cgroup_mutexe, however there are
+ * several performance critical places that need to reference
+ * task->cgroup without the expense of grabbing a system global
+ * mutex.  Therefore except as noted below, when dereferencing or, as
+ * in attach_task(), modifying a task'ss cgroup pointer we use
+ * task_lock(), which acts on a spinlock (task->alloc_lock) already in
+ * the task_struct routinely used for such matters.
+ *
+ * P.S.  One more locking exception.  RCU is used to guard the
+ * update of a tasks cgroup pointer by attach_task()
+ */
+
+/**
+ * cgroup_lock - lock out any changes to cgroup structures
+ *
+ */
+
+void cgroup_lock(void)
+{
+	mutex_lock(&cgroup_mutex);
+}
+
+/**
+ * cgroup_unlock - release lock on cgroup changes
+ *
+ * Undo the lock taken in a previous cgroup_lock() call.
+ */
+
+void cgroup_unlock(void)
+{
+	mutex_unlock(&cgroup_mutex);
+}
+
+/*
+ * A couple of forward declarations required, due to cyclic reference loop:
+ * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir ->
+ * cgroup_add_file -> cgroup_create_file -> cgroup_dir_inode_operations
+ * -> cgroup_mkdir.
+ */
+
+static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
+static int cgroup_populate_dir(struct cgroup *cgrp);
+static struct inode_operations cgroup_dir_inode_operations;
+static struct file_operations proc_cgroupstats_operations;
+
+static struct backing_dev_info cgroup_backing_dev_info = {
+	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+};
+
+static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
+{
+	struct inode *inode = new_inode(sb);
+
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = current->fsuid;
+		inode->i_gid = current->fsgid;
+		inode->i_blocks = 0;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
+	}
+	return inode;
+}
+
+static void cgroup_diput(struct dentry *dentry, struct inode *inode)
+{
+	/* is dentry a directory ? if so, kfree() associated cgroup */
+	if (S_ISDIR(inode->i_mode)) {
+		struct cgroup *cgrp = dentry->d_fsdata;
+		BUG_ON(!(cgroup_is_removed(cgrp)));
+		/* It's possible for external users to be holding css
+		 * reference counts on a cgroup; css_put() needs to
+		 * be able to access the cgroup after decrementing
+		 * the reference count in order to know if it needs to
+		 * queue the cgroup to be handled by the release
+		 * agent */
+		synchronize_rcu();
+		kfree(cgrp);
+	}
+	iput(inode);
+}
+
+static void remove_dir(struct dentry *d)
+{
+	struct dentry *parent = dget(d->d_parent);
+
+	d_delete(d);
+	simple_rmdir(parent->d_inode, d);
+	dput(parent);
+}
+
+static void cgroup_clear_directory(struct dentry *dentry)
+{
+	struct list_head *node;
+
+	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+	spin_lock(&dcache_lock);
+	node = dentry->d_subdirs.next;
+	while (node != &dentry->d_subdirs) {
+		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+		list_del_init(node);
+		if (d->d_inode) {
+			/* This should never be called on a cgroup
+			 * directory with child cgroups */
+			BUG_ON(d->d_inode->i_mode & S_IFDIR);
+			d = dget_locked(d);
+			spin_unlock(&dcache_lock);
+			d_delete(d);
+			simple_unlink(dentry->d_inode, d);
+			dput(d);
+			spin_lock(&dcache_lock);
+		}
+		node = dentry->d_subdirs.next;
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/*
+ * NOTE : the dentry must have been dget()'ed
+ */
+static void cgroup_d_remove_dir(struct dentry *dentry)
+{
+	cgroup_clear_directory(dentry);
+
+	spin_lock(&dcache_lock);
+	list_del_init(&dentry->d_u.d_child);
+	spin_unlock(&dcache_lock);
+	remove_dir(dentry);
+}
+
+static int rebind_subsystems(struct cgroupfs_root *root,
+			      unsigned long final_bits)
+{
+	unsigned long added_bits, removed_bits;
+	struct cgroup *cgrp = &root->top_cgroup;
+	int i;
+
+	removed_bits = root->actual_subsys_bits & ~final_bits;
+	added_bits = final_bits & ~root->actual_subsys_bits;
+	/* Check that any added subsystems are currently free */
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		unsigned long long bit = 1ull << i;
+		struct cgroup_subsys *ss = subsys[i];
+		if (!(bit & added_bits))
+			continue;
+		if (ss->root != &rootnode) {
+			/* Subsystem isn't free */
+			return -EBUSY;
+		}
+	}
+
+	/* Currently we don't handle adding/removing subsystems when
+	 * any child cgroups exist. This is theoretically supportable
+	 * but involves complex error handling, so it's being left until
+	 * later */
+	if (!list_empty(&cgrp->children))
+		return -EBUSY;
+
+	/* Process each subsystem */
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		unsigned long bit = 1UL << i;
+		if (bit & added_bits) {
+			/* We're binding this subsystem to this hierarchy */
+			BUG_ON(cgrp->subsys[i]);
+			BUG_ON(!dummytop->subsys[i]);
+			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
+			cgrp->subsys[i] = dummytop->subsys[i];
+			cgrp->subsys[i]->cgroup = cgrp;
+			list_add(&ss->sibling, &root->subsys_list);
+			rcu_assign_pointer(ss->root, root);
+			if (ss->bind)
+				ss->bind(ss, cgrp);
+
+		} else if (bit & removed_bits) {
+			/* We're removing this subsystem */
+			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
+			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+			if (ss->bind)
+				ss->bind(ss, dummytop);
+			dummytop->subsys[i]->cgroup = dummytop;
+			cgrp->subsys[i] = NULL;
+			rcu_assign_pointer(subsys[i]->root, &rootnode);
+			list_del(&ss->sibling);
+		} else if (bit & final_bits) {
+			/* Subsystem state should already exist */
+			BUG_ON(!cgrp->subsys[i]);
+		} else {
+			/* Subsystem state shouldn't exist */
+			BUG_ON(cgrp->subsys[i]);
+		}
+	}
+	root->subsys_bits = root->actual_subsys_bits = final_bits;
+	synchronize_rcu();
+
+	return 0;
+}
+
+static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+	struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
+	struct cgroup_subsys *ss;
+
+	mutex_lock(&cgroup_mutex);
+	for_each_subsys(root, ss)
+		seq_printf(seq, ",%s", ss->name);
+	if (test_bit(ROOT_NOPREFIX, &root->flags))
+		seq_puts(seq, ",noprefix");
+	if (strlen(root->release_agent_path))
+		seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
+struct cgroup_sb_opts {
+	unsigned long subsys_bits;
+	unsigned long flags;
+	char *release_agent;
+};
+
+/* Convert a hierarchy specifier into a bitmask of subsystems and
+ * flags. */
+static int parse_cgroupfs_options(char *data,
+				     struct cgroup_sb_opts *opts)
+{
+	char *token, *o = data ?: "all";
+
+	opts->subsys_bits = 0;
+	opts->flags = 0;
+	opts->release_agent = NULL;
+
+	while ((token = strsep(&o, ",")) != NULL) {
+		if (!*token)
+			return -EINVAL;
+		if (!strcmp(token, "all")) {
+			opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1;
+		} else if (!strcmp(token, "noprefix")) {
+			set_bit(ROOT_NOPREFIX, &opts->flags);
+		} else if (!strncmp(token, "release_agent=", 14)) {
+			/* Specifying two release agents is forbidden */
+			if (opts->release_agent)
+				return -EINVAL;
+			opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
+			if (!opts->release_agent)
+				return -ENOMEM;
+			strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
+			opts->release_agent[PATH_MAX - 1] = 0;
+		} else {
+			struct cgroup_subsys *ss;
+			int i;
+			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+				ss = subsys[i];
+				if (!strcmp(token, ss->name)) {
+					set_bit(i, &opts->subsys_bits);
+					break;
+				}
+			}
+			if (i == CGROUP_SUBSYS_COUNT)
+				return -ENOENT;
+		}
+	}
+
+	/* We can't have an empty hierarchy */
+	if (!opts->subsys_bits)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int cgroup_remount(struct super_block *sb, int *flags, char *data)
+{
+	int ret = 0;
+	struct cgroupfs_root *root = sb->s_fs_info;
+	struct cgroup *cgrp = &root->top_cgroup;
+	struct cgroup_sb_opts opts;
+
+	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
+	mutex_lock(&cgroup_mutex);
+
+	/* See what subsystems are wanted */
+	ret = parse_cgroupfs_options(data, &opts);
+	if (ret)
+		goto out_unlock;
+
+	/* Don't allow flags to change at remount */
+	if (opts.flags != root->flags) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	ret = rebind_subsystems(root, opts.subsys_bits);
+
+	/* (re)populate subsystem files */
+	if (!ret)
+		cgroup_populate_dir(cgrp);
+
+	if (opts.release_agent)
+		strcpy(root->release_agent_path, opts.release_agent);
+ out_unlock:
+	if (opts.release_agent)
+		kfree(opts.release_agent);
+	mutex_unlock(&cgroup_mutex);
+	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+	return ret;
+}
+
+static struct super_operations cgroup_ops = {
+	.statfs = simple_statfs,
+	.drop_inode = generic_delete_inode,
+	.show_options = cgroup_show_options,
+	.remount_fs = cgroup_remount,
+};
+
+static void init_cgroup_root(struct cgroupfs_root *root)
+{
+	struct cgroup *cgrp = &root->top_cgroup;
+	INIT_LIST_HEAD(&root->subsys_list);
+	INIT_LIST_HEAD(&root->root_list);
+	root->number_of_cgroups = 1;
+	cgrp->root = root;
+	cgrp->top_cgroup = cgrp;
+	INIT_LIST_HEAD(&cgrp->sibling);
+	INIT_LIST_HEAD(&cgrp->children);
+	INIT_LIST_HEAD(&cgrp->css_sets);
+	INIT_LIST_HEAD(&cgrp->release_list);
+}
+
+static int cgroup_test_super(struct super_block *sb, void *data)
+{
+	struct cgroupfs_root *new = data;
+	struct cgroupfs_root *root = sb->s_fs_info;
+
+	/* First check subsystems */
+	if (new->subsys_bits != root->subsys_bits)
+	    return 0;
+
+	/* Next check flags */
+	if (new->flags != root->flags)
+		return 0;
+
+	return 1;
+}
+
+static int cgroup_set_super(struct super_block *sb, void *data)
+{
+	int ret;
+	struct cgroupfs_root *root = data;
+
+	ret = set_anon_super(sb, NULL);
+	if (ret)
+		return ret;
+
+	sb->s_fs_info = root;
+	root->sb = sb;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = CGROUP_SUPER_MAGIC;
+	sb->s_op = &cgroup_ops;
+
+	return 0;
+}
+
+static int cgroup_get_rootdir(struct super_block *sb)
+{
+	struct inode *inode =
+		cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
+	struct dentry *dentry;
+
+	if (!inode)
+		return -ENOMEM;
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	inode->i_op = &cgroup_dir_inode_operations;
+	/* directories start off with i_nlink == 2 (for "." entry) */
+	inc_nlink(inode);
+	dentry = d_alloc_root(inode);
+	if (!dentry) {
+		iput(inode);
+		return -ENOMEM;
+	}
+	sb->s_root = dentry;
+	return 0;
+}
+
+static int cgroup_get_sb(struct file_system_type *fs_type,
+			 int flags, const char *unused_dev_name,
+			 void *data, struct vfsmount *mnt)
+{
+	struct cgroup_sb_opts opts;
+	int ret = 0;
+	struct super_block *sb;
+	struct cgroupfs_root *root;
+	struct list_head tmp_cg_links, *l;
+	INIT_LIST_HEAD(&tmp_cg_links);
+
+	/* First find the desired set of subsystems */
+	ret = parse_cgroupfs_options(data, &opts);
+	if (ret) {
+		if (opts.release_agent)
+			kfree(opts.release_agent);
+		return ret;
+	}
+
+	root = kzalloc(sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return -ENOMEM;
+
+	init_cgroup_root(root);
+	root->subsys_bits = opts.subsys_bits;
+	root->flags = opts.flags;
+	if (opts.release_agent) {
+		strcpy(root->release_agent_path, opts.release_agent);
+		kfree(opts.release_agent);
+	}
+
+	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
+
+	if (IS_ERR(sb)) {
+		kfree(root);
+		return PTR_ERR(sb);
+	}
+
+	if (sb->s_fs_info != root) {
+		/* Reusing an existing superblock */
+		BUG_ON(sb->s_root == NULL);
+		kfree(root);
+		root = NULL;
+	} else {
+		/* New superblock */
+		struct cgroup *cgrp = &root->top_cgroup;
+		struct inode *inode;
+
+		BUG_ON(sb->s_root != NULL);
+
+		ret = cgroup_get_rootdir(sb);
+		if (ret)
+			goto drop_new_super;
+		inode = sb->s_root->d_inode;
+
+		mutex_lock(&inode->i_mutex);
+		mutex_lock(&cgroup_mutex);
+
+		/*
+		 * We're accessing css_set_count without locking
+		 * css_set_lock here, but that's OK - it can only be
+		 * increased by someone holding cgroup_lock, and
+		 * that's us. The worst that can happen is that we
+		 * have some link structures left over
+		 */
+		ret = allocate_cg_links(css_set_count, &tmp_cg_links);
+		if (ret) {
+			mutex_unlock(&cgroup_mutex);
+			mutex_unlock(&inode->i_mutex);
+			goto drop_new_super;
+		}
+
+		ret = rebind_subsystems(root, root->subsys_bits);
+		if (ret == -EBUSY) {
+			mutex_unlock(&cgroup_mutex);
+			mutex_unlock(&inode->i_mutex);
+			goto drop_new_super;
+		}
+
+		/* EBUSY should be the only error here */
+		BUG_ON(ret);
+
+		list_add(&root->root_list, &roots);
+		root_count++;
+
+		sb->s_root->d_fsdata = &root->top_cgroup;
+		root->top_cgroup.dentry = sb->s_root;
+
+		/* Link the top cgroup in this hierarchy into all
+		 * the css_set objects */
+		write_lock(&css_set_lock);
+		l = &init_css_set.list;
+		do {
+			struct css_set *cg;
+			struct cg_cgroup_link *link;
+			cg = list_entry(l, struct css_set, list);
+			BUG_ON(list_empty(&tmp_cg_links));
+			link = list_entry(tmp_cg_links.next,
+					  struct cg_cgroup_link,
+					  cgrp_link_list);
+			list_del(&link->cgrp_link_list);
+			link->cg = cg;
+			list_add(&link->cgrp_link_list,
+				 &root->top_cgroup.css_sets);
+			list_add(&link->cg_link_list, &cg->cg_links);
+			l = l->next;
+		} while (l != &init_css_set.list);
+		write_unlock(&css_set_lock);
+
+		free_cg_links(&tmp_cg_links);
+
+		BUG_ON(!list_empty(&cgrp->sibling));
+		BUG_ON(!list_empty(&cgrp->children));
+		BUG_ON(root->number_of_cgroups != 1);
+
+		cgroup_populate_dir(cgrp);
+		mutex_unlock(&inode->i_mutex);
+		mutex_unlock(&cgroup_mutex);
+	}
+
+	return simple_set_mnt(mnt, sb);
+
+ drop_new_super:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	free_cg_links(&tmp_cg_links);
+	return ret;
+}
+
+static void cgroup_kill_sb(struct super_block *sb) {
+	struct cgroupfs_root *root = sb->s_fs_info;
+	struct cgroup *cgrp = &root->top_cgroup;
+	int ret;
+
+	BUG_ON(!root);
+
+	BUG_ON(root->number_of_cgroups != 1);
+	BUG_ON(!list_empty(&cgrp->children));
+	BUG_ON(!list_empty(&cgrp->sibling));
+
+	mutex_lock(&cgroup_mutex);
+
+	/* Rebind all subsystems back to the default hierarchy */
+	ret = rebind_subsystems(root, 0);
+	/* Shouldn't be able to fail ... */
+	BUG_ON(ret);
+
+	/*
+	 * Release all the links from css_sets to this hierarchy's
+	 * root cgroup
+	 */
+	write_lock(&css_set_lock);
+	while (!list_empty(&cgrp->css_sets)) {
+		struct cg_cgroup_link *link;
+		link = list_entry(cgrp->css_sets.next,
+				  struct cg_cgroup_link, cgrp_link_list);
+		list_del(&link->cg_link_list);
+		list_del(&link->cgrp_link_list);
+		kfree(link);
+	}
+	write_unlock(&css_set_lock);
+
+	if (!list_empty(&root->root_list)) {
+		list_del(&root->root_list);
+		root_count--;
+	}
+	mutex_unlock(&cgroup_mutex);
+
+	kfree(root);
+	kill_litter_super(sb);
+}
+
+static struct file_system_type cgroup_fs_type = {
+	.name = "cgroup",
+	.get_sb = cgroup_get_sb,
+	.kill_sb = cgroup_kill_sb,
+};
+
+static inline struct cgroup *__d_cgrp(struct dentry *dentry)
+{
+	return dentry->d_fsdata;
+}
+
+static inline struct cftype *__d_cft(struct dentry *dentry)
+{
+	return dentry->d_fsdata;
+}
+
+/*
+ * Called with cgroup_mutex held.  Writes path of cgroup into buf.
+ * Returns 0 on success, -errno on error.
+ */
+int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
+{
+	char *start;
+
+	if (cgrp == dummytop) {
+		/*
+		 * Inactive subsystems have no dentry for their root
+		 * cgroup
+		 */
+		strcpy(buf, "/");
+		return 0;
+	}
+
+	start = buf + buflen;
+
+	*--start = '\0';
+	for (;;) {
+		int len = cgrp->dentry->d_name.len;
+		if ((start -= len) < buf)
+			return -ENAMETOOLONG;
+		memcpy(start, cgrp->dentry->d_name.name, len);
+		cgrp = cgrp->parent;
+		if (!cgrp)
+			break;
+		if (!cgrp->parent)
+			continue;
+		if (--start < buf)
+			return -ENAMETOOLONG;
+		*start = '/';
+	}
+	memmove(buf, start, buf + buflen - start);
+	return 0;
+}
+
+/*
+ * Return the first subsystem attached to a cgroup's hierarchy, and
+ * its subsystem id.
+ */
+
+static void get_first_subsys(const struct cgroup *cgrp,
+			struct cgroup_subsys_state **css, int *subsys_id)
+{
+	const struct cgroupfs_root *root = cgrp->root;
+	const struct cgroup_subsys *test_ss;
+	BUG_ON(list_empty(&root->subsys_list));
+	test_ss = list_entry(root->subsys_list.next,
+			     struct cgroup_subsys, sibling);
+	if (css) {
+		*css = cgrp->subsys[test_ss->subsys_id];
+		BUG_ON(!*css);
+	}
+	if (subsys_id)
+		*subsys_id = test_ss->subsys_id;
+}
+
+/*
+ * Attach task 'tsk' to cgroup 'cgrp'
+ *
+ * Call holding cgroup_mutex.  May take task_lock of
+ * the task 'pid' during call.
+ */
+static int attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+{
+	int retval = 0;
+	struct cgroup_subsys *ss;
+	struct cgroup *oldcgrp;
+	struct css_set *cg = tsk->cgroups;
+	struct css_set *newcg;
+	struct cgroupfs_root *root = cgrp->root;
+	int subsys_id;
+
+	get_first_subsys(cgrp, NULL, &subsys_id);
+
+	/* Nothing to do if the task is already in that cgroup */
+	oldcgrp = task_cgroup(tsk, subsys_id);
+	if (cgrp == oldcgrp)
+		return 0;
+
+	for_each_subsys(root, ss) {
+		if (ss->can_attach) {
+			retval = ss->can_attach(ss, cgrp, tsk);
+			if (retval) {
+				return retval;
+			}
+		}
+	}
+
+	/*
+	 * Locate or allocate a new css_set for this task,
+	 * based on its final set of cgroups
+	 */
+	newcg = find_css_set(cg, cgrp);
+	if (!newcg) {
+		return -ENOMEM;
+	}
+
+	task_lock(tsk);
+	if (tsk->flags & PF_EXITING) {
+		task_unlock(tsk);
+		put_css_set(newcg);
+		return -ESRCH;
+	}
+	rcu_assign_pointer(tsk->cgroups, newcg);
+	task_unlock(tsk);
+
+	/* Update the css_set linked lists if we're using them */
+	write_lock(&css_set_lock);
+	if (!list_empty(&tsk->cg_list)) {
+		list_del(&tsk->cg_list);
+		list_add(&tsk->cg_list, &newcg->tasks);
+	}
+	write_unlock(&css_set_lock);
+
+	for_each_subsys(root, ss) {
+		if (ss->attach) {
+			ss->attach(ss, cgrp, oldcgrp, tsk);
+		}
+	}
+	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
+	synchronize_rcu();
+	put_css_set(cg);
+	return 0;
+}
+
+/*
+ * Attach task with pid 'pid' to cgroup 'cgrp'. Call with
+ * cgroup_mutex, may take task_lock of task
+ */
+static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
+{
+	pid_t pid;
+	struct task_struct *tsk;
+	int ret;
+
+	if (sscanf(pidbuf, "%d", &pid) != 1)
+		return -EIO;
+
+	if (pid) {
+		rcu_read_lock();
+		tsk = find_task_by_pid(pid);
+		if (!tsk || tsk->flags & PF_EXITING) {
+			rcu_read_unlock();
+			return -ESRCH;
+		}
+		get_task_struct(tsk);
+		rcu_read_unlock();
+
+		if ((current->euid) && (current->euid != tsk->uid)
+		    && (current->euid != tsk->suid)) {
+			put_task_struct(tsk);
+			return -EACCES;
+		}
+	} else {
+		tsk = current;
+		get_task_struct(tsk);
+	}
+
+	ret = attach_task(cgrp, tsk);
+	put_task_struct(tsk);
+	return ret;
+}
+
+/* The various types of files and directories in a cgroup file system */
+
+enum cgroup_filetype {
+	FILE_ROOT,
+	FILE_DIR,
+	FILE_TASKLIST,
+	FILE_NOTIFY_ON_RELEASE,
+	FILE_RELEASABLE,
+	FILE_RELEASE_AGENT,
+};
+
+static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft,
+				 struct file *file,
+				 const char __user *userbuf,
+				 size_t nbytes, loff_t *unused_ppos)
+{
+	char buffer[64];
+	int retval = 0;
+	u64 val;
+	char *end;
+
+	if (!nbytes)
+		return -EINVAL;
+	if (nbytes >= sizeof(buffer))
+		return -E2BIG;
+	if (copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;     /* nul-terminate */
+
+	/* strip newline if necessary */
+	if (nbytes && (buffer[nbytes-1] == '\n'))
+		buffer[nbytes-1] = 0;
+	val = simple_strtoull(buffer, &end, 0);
+	if (*end)
+		return -EINVAL;
+
+	/* Pass to subsystem */
+	retval = cft->write_uint(cgrp, cft, val);
+	if (!retval)
+		retval = nbytes;
+	return retval;
+}
+
+static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
+					   struct cftype *cft,
+					   struct file *file,
+					   const char __user *userbuf,
+					   size_t nbytes, loff_t *unused_ppos)
+{
+	enum cgroup_filetype type = cft->private;
+	char *buffer;
+	int retval = 0;
+
+	if (nbytes >= PATH_MAX)
+		return -E2BIG;
+
+	/* +1 for nul-terminator */
+	buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+	if (buffer == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buffer, userbuf, nbytes)) {
+		retval = -EFAULT;
+		goto out1;
+	}
+	buffer[nbytes] = 0;	/* nul-terminate */
+
+	mutex_lock(&cgroup_mutex);
+
+	if (cgroup_is_removed(cgrp)) {
+		retval = -ENODEV;
+		goto out2;
+	}
+
+	switch (type) {
+	case FILE_TASKLIST:
+		retval = attach_task_by_pid(cgrp, buffer);
+		break;
+	case FILE_NOTIFY_ON_RELEASE:
+		clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+		if (simple_strtoul(buffer, NULL, 10) != 0)
+			set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+		else
+			clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+		break;
+	case FILE_RELEASE_AGENT:
+	{
+		struct cgroupfs_root *root = cgrp->root;
+		/* Strip trailing newline */
+		if (nbytes && (buffer[nbytes-1] == '\n')) {
+			buffer[nbytes-1] = 0;
+		}
+		if (nbytes < sizeof(root->release_agent_path)) {
+			/* We never write anything other than '\0'
+			 * into the last char of release_agent_path,
+			 * so it always remains a NUL-terminated
+			 * string */
+			strncpy(root->release_agent_path, buffer, nbytes);
+			root->release_agent_path[nbytes] = 0;
+		} else {
+			retval = -ENOSPC;
+		}
+		break;
+	}
+	default:
+		retval = -EINVAL;
+		goto out2;
+	}
+
+	if (retval == 0)
+		retval = nbytes;
+out2:
+	mutex_unlock(&cgroup_mutex);
+out1:
+	kfree(buffer);
+	return retval;
+}
+
+static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
+						size_t nbytes, loff_t *ppos)
+{
+	struct cftype *cft = __d_cft(file->f_dentry);
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+	if (!cft)
+		return -ENODEV;
+	if (cft->write)
+		return cft->write(cgrp, cft, file, buf, nbytes, ppos);
+	if (cft->write_uint)
+		return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos);
+	return -EINVAL;
+}
+
+static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft,
+				   struct file *file,
+				   char __user *buf, size_t nbytes,
+				   loff_t *ppos)
+{
+	char tmp[64];
+	u64 val = cft->read_uint(cgrp, cft);
+	int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
+
+	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
+}
+
+static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
+					  struct cftype *cft,
+					  struct file *file,
+					  char __user *buf,
+					  size_t nbytes, loff_t *ppos)
+{
+	enum cgroup_filetype type = cft->private;
+	char *page;
+	ssize_t retval = 0;
+	char *s;
+
+	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
+		return -ENOMEM;
+
+	s = page;
+
+	switch (type) {
+	case FILE_RELEASE_AGENT:
+	{
+		struct cgroupfs_root *root;
+		size_t n;
+		mutex_lock(&cgroup_mutex);
+		root = cgrp->root;
+		n = strnlen(root->release_agent_path,
+			    sizeof(root->release_agent_path));
+		n = min(n, (size_t) PAGE_SIZE);
+		strncpy(s, root->release_agent_path, n);
+		mutex_unlock(&cgroup_mutex);
+		s += n;
+		break;
+	}
+	default:
+		retval = -EINVAL;
+		goto out;
+	}
+	*s++ = '\n';
+
+	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
+out:
+	free_page((unsigned long)page);
+	return retval;
+}
+
+static ssize_t cgroup_file_read(struct file *file, char __user *buf,
+				   size_t nbytes, loff_t *ppos)
+{
+	struct cftype *cft = __d_cft(file->f_dentry);
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+	if (!cft)
+		return -ENODEV;
+
+	if (cft->read)
+		return cft->read(cgrp, cft, file, buf, nbytes, ppos);
+	if (cft->read_uint)
+		return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos);
+	return -EINVAL;
+}
+
+static int cgroup_file_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct cftype *cft;
+
+	err = generic_file_open(inode, file);
+	if (err)
+		return err;
+
+	cft = __d_cft(file->f_dentry);
+	if (!cft)
+		return -ENODEV;
+	if (cft->open)
+		err = cft->open(inode, file);
+	else
+		err = 0;
+
+	return err;
+}
+
+static int cgroup_file_release(struct inode *inode, struct file *file)
+{
+	struct cftype *cft = __d_cft(file->f_dentry);
+	if (cft->release)
+		return cft->release(inode, file);
+	return 0;
+}
+
+/*
+ * cgroup_rename - Only allow simple rename of directories in place.
+ */
+static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
+			    struct inode *new_dir, struct dentry *new_dentry)
+{
+	if (!S_ISDIR(old_dentry->d_inode->i_mode))
+		return -ENOTDIR;
+	if (new_dentry->d_inode)
+		return -EEXIST;
+	if (old_dir != new_dir)
+		return -EIO;
+	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
+static struct file_operations cgroup_file_operations = {
+	.read = cgroup_file_read,
+	.write = cgroup_file_write,
+	.llseek = generic_file_llseek,
+	.open = cgroup_file_open,
+	.release = cgroup_file_release,
+};
+
+static struct inode_operations cgroup_dir_inode_operations = {
+	.lookup = simple_lookup,
+	.mkdir = cgroup_mkdir,
+	.rmdir = cgroup_rmdir,
+	.rename = cgroup_rename,
+};
+
+static int cgroup_create_file(struct dentry *dentry, int mode,
+				struct super_block *sb)
+{
+	static struct dentry_operations cgroup_dops = {
+		.d_iput = cgroup_diput,
+	};
+
+	struct inode *inode;
+
+	if (!dentry)
+		return -ENOENT;
+	if (dentry->d_inode)
+		return -EEXIST;
+
+	inode = cgroup_new_inode(mode, sb);
+	if (!inode)
+		return -ENOMEM;
+
+	if (S_ISDIR(mode)) {
+		inode->i_op = &cgroup_dir_inode_operations;
+		inode->i_fop = &simple_dir_operations;
+
+		/* start off with i_nlink == 2 (for "." entry) */
+		inc_nlink(inode);
+
+		/* start with the directory inode held, so that we can
+		 * populate it without racing with another mkdir */
+		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+	} else if (S_ISREG(mode)) {
+		inode->i_size = 0;
+		inode->i_fop = &cgroup_file_operations;
+	}
+	dentry->d_op = &cgroup_dops;
+	d_instantiate(dentry, inode);
+	dget(dentry);	/* Extra count - pin the dentry in core */
+	return 0;
+}
+
+/*
+ *	cgroup_create_dir - create a directory for an object.
+ *	cgrp:	the cgroup we create the directory for.
+ *		It must have a valid ->parent field
+ *		And we are going to fill its ->dentry field.
+ *	dentry: dentry of the new cgroup
+ *	mode:	mode to set on new directory.
+ */
+static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
+				int mode)
+{
+	struct dentry *parent;
+	int error = 0;
+
+	parent = cgrp->parent->dentry;
+	error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
+	if (!error) {
+		dentry->d_fsdata = cgrp;
+		inc_nlink(parent->d_inode);
+		cgrp->dentry = dentry;
+		dget(dentry);
+	}
+	dput(dentry);
+
+	return error;
+}
+
+int cgroup_add_file(struct cgroup *cgrp,
+		       struct cgroup_subsys *subsys,
+		       const struct cftype *cft)
+{
+	struct dentry *dir = cgrp->dentry;
+	struct dentry *dentry;
+	int error;
+
+	char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
+	if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
+		strcpy(name, subsys->name);
+		strcat(name, ".");
+	}
+	strcat(name, cft->name);
+	BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
+	dentry = lookup_one_len(name, dir, strlen(name));
+	if (!IS_ERR(dentry)) {
+		error = cgroup_create_file(dentry, 0644 | S_IFREG,
+						cgrp->root->sb);
+		if (!error)
+			dentry->d_fsdata = (void *)cft;
+		dput(dentry);
+	} else
+		error = PTR_ERR(dentry);
+	return error;
+}
+
+int cgroup_add_files(struct cgroup *cgrp,
+			struct cgroup_subsys *subsys,
+			const struct cftype cft[],
+			int count)
+{
+	int i, err;
+	for (i = 0; i < count; i++) {
+		err = cgroup_add_file(cgrp, subsys, &cft[i]);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/* Count the number of tasks in a cgroup. */
+
+int cgroup_task_count(const struct cgroup *cgrp)
+{
+	int count = 0;
+	struct list_head *l;
+
+	read_lock(&css_set_lock);
+	l = cgrp->css_sets.next;
+	while (l != &cgrp->css_sets) {
+		struct cg_cgroup_link *link =
+			list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+		count += atomic_read(&link->cg->ref.refcount);
+		l = l->next;
+	}
+	read_unlock(&css_set_lock);
+	return count;
+}
+
+/*
+ * Advance a list_head iterator.  The iterator should be positioned at
+ * the start of a css_set
+ */
+static void cgroup_advance_iter(struct cgroup *cgrp,
+					  struct cgroup_iter *it)
+{
+	struct list_head *l = it->cg_link;
+	struct cg_cgroup_link *link;
+	struct css_set *cg;
+
+	/* Advance to the next non-empty css_set */
+	do {
+		l = l->next;
+		if (l == &cgrp->css_sets) {
+			it->cg_link = NULL;
+			return;
+		}
+		link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+		cg = link->cg;
+	} while (list_empty(&cg->tasks));
+	it->cg_link = l;
+	it->task = cg->tasks.next;
+}
+
+void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
+{
+	/*
+	 * The first time anyone tries to iterate across a cgroup,
+	 * we need to enable the list linking each css_set to its
+	 * tasks, and fix up all existing tasks.
+	 */
+	if (!use_task_css_set_links) {
+		struct task_struct *p, *g;
+		write_lock(&css_set_lock);
+		use_task_css_set_links = 1;
+ 		do_each_thread(g, p) {
+			task_lock(p);
+			if (list_empty(&p->cg_list))
+				list_add(&p->cg_list, &p->cgroups->tasks);
+			task_unlock(p);
+ 		} while_each_thread(g, p);
+		write_unlock(&css_set_lock);
+	}
+	read_lock(&css_set_lock);
+	it->cg_link = &cgrp->css_sets;
+	cgroup_advance_iter(cgrp, it);
+}
+
+struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
+					struct cgroup_iter *it)
+{
+	struct task_struct *res;
+	struct list_head *l = it->task;
+
+	/* If the iterator cg is NULL, we have no tasks */
+	if (!it->cg_link)
+		return NULL;
+	res = list_entry(l, struct task_struct, cg_list);
+	/* Advance iterator to find next entry */
+	l = l->next;
+	if (l == &res->cgroups->tasks) {
+		/* We reached the end of this task list - move on to
+		 * the next cg_cgroup_link */
+		cgroup_advance_iter(cgrp, it);
+	} else {
+		it->task = l;
+	}
+	return res;
+}
+
+void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
+{
+	read_unlock(&css_set_lock);
+}
+
+/*
+ * Stuff for reading the 'tasks' file.
+ *
+ * Reading this file can return large amounts of data if a cgroup has
+ * *lots* of attached tasks. So it may need several calls to read(),
+ * but we cannot guarantee that the information we produce is correct
+ * unless we produce it entirely atomically.
+ *
+ * Upon tasks file open(), a struct ctr_struct is allocated, that
+ * will have a pointer to an array (also allocated here).  The struct
+ * ctr_struct * is stored in file->private_data.  Its resources will
+ * be freed by release() when the file is closed.  The array is used
+ * to sprintf the PIDs and then used by read().
+ */
+struct ctr_struct {
+	char *buf;
+	int bufsz;
+};
+
+/*
+ * Load into 'pidarray' up to 'npids' of the tasks using cgroup
+ * 'cgrp'.  Return actual number of pids loaded.  No need to
+ * task_lock(p) when reading out p->cgroup, since we're in an RCU
+ * read section, so the css_set can't go away, and is
+ * immutable after creation.
+ */
+static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+{
+	int n = 0;
+	struct cgroup_iter it;
+	struct task_struct *tsk;
+	cgroup_iter_start(cgrp, &it);
+	while ((tsk = cgroup_iter_next(cgrp, &it))) {
+		if (unlikely(n == npids))
+			break;
+		pidarray[n++] = task_pid_nr(tsk);
+	}
+	cgroup_iter_end(cgrp, &it);
+	return n;
+}
+
+/**
+ * Build and fill cgroupstats so that taskstats can export it to user
+ * space.
+ *
+ * @stats: cgroupstats to fill information into
+ * @dentry: A dentry entry belonging to the cgroup for which stats have
+ * been requested.
+ */
+int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
+{
+	int ret = -EINVAL;
+	struct cgroup *cgrp;
+	struct cgroup_iter it;
+	struct task_struct *tsk;
+	/*
+	 * Validate dentry by checking the superblock operations
+	 */
+	if (dentry->d_sb->s_op != &cgroup_ops)
+		 goto err;
+
+	ret = 0;
+	cgrp = dentry->d_fsdata;
+	rcu_read_lock();
+
+	cgroup_iter_start(cgrp, &it);
+	while ((tsk = cgroup_iter_next(cgrp, &it))) {
+		switch (tsk->state) {
+		case TASK_RUNNING:
+			stats->nr_running++;
+			break;
+		case TASK_INTERRUPTIBLE:
+			stats->nr_sleeping++;
+			break;
+		case TASK_UNINTERRUPTIBLE:
+			stats->nr_uninterruptible++;
+			break;
+		case TASK_STOPPED:
+			stats->nr_stopped++;
+			break;
+		default:
+			if (delayacct_is_task_waiting_on_io(tsk))
+				stats->nr_io_wait++;
+			break;
+		}
+	}
+	cgroup_iter_end(cgrp, &it);
+
+	rcu_read_unlock();
+err:
+	return ret;
+}
+
+static int cmppid(const void *a, const void *b)
+{
+	return *(pid_t *)a - *(pid_t *)b;
+}
+
+/*
+ * Convert array 'a' of 'npids' pid_t's to a string of newline separated
+ * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
+ * count 'cnt' of how many chars would be written if buf were large enough.
+ */
+static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+{
+	int cnt = 0;
+	int i;
+
+	for (i = 0; i < npids; i++)
+		cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
+	return cnt;
+}
+
+/*
+ * Handle an open on 'tasks' file.  Prepare a buffer listing the
+ * process id's of tasks currently attached to the cgroup being opened.
+ *
+ * Does not require any specific cgroup mutexes, and does not take any.
+ */
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
+{
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+	struct ctr_struct *ctr;
+	pid_t *pidarray;
+	int npids;
+	char c;
+
+	if (!(file->f_mode & FMODE_READ))
+		return 0;
+
+	ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
+	if (!ctr)
+		goto err0;
+
+	/*
+	 * If cgroup gets more users after we read count, we won't have
+	 * enough space - tough.  This race is indistinguishable to the
+	 * caller from the case that the additional cgroup users didn't
+	 * show up until sometime later on.
+	 */
+	npids = cgroup_task_count(cgrp);
+	if (npids) {
+		pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+		if (!pidarray)
+			goto err1;
+
+		npids = pid_array_load(pidarray, npids, cgrp);
+		sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
+
+		/* Call pid_array_to_buf() twice, first just to get bufsz */
+		ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
+		ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
+		if (!ctr->buf)
+			goto err2;
+		ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
+
+		kfree(pidarray);
+	} else {
+		ctr->buf = 0;
+		ctr->bufsz = 0;
+	}
+	file->private_data = ctr;
+	return 0;
+
+err2:
+	kfree(pidarray);
+err1:
+	kfree(ctr);
+err0:
+	return -ENOMEM;
+}
+
+static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
+				    struct cftype *cft,
+				    struct file *file, char __user *buf,
+				    size_t nbytes, loff_t *ppos)
+{
+	struct ctr_struct *ctr = file->private_data;
+
+	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
+}
+
+static int cgroup_tasks_release(struct inode *unused_inode,
+					struct file *file)
+{
+	struct ctr_struct *ctr;
+
+	if (file->f_mode & FMODE_READ) {
+		ctr = file->private_data;
+		kfree(ctr->buf);
+		kfree(ctr);
+	}
+	return 0;
+}
+
+static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
+					    struct cftype *cft)
+{
+	return notify_on_release(cgrp);
+}
+
+static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft)
+{
+	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+}
+
+/*
+ * for the common functions, 'private' gives the type of file
+ */
+static struct cftype files[] = {
+	{
+		.name = "tasks",
+		.open = cgroup_tasks_open,
+		.read = cgroup_tasks_read,
+		.write = cgroup_common_file_write,
+		.release = cgroup_tasks_release,
+		.private = FILE_TASKLIST,
+	},
+
+	{
+		.name = "notify_on_release",
+		.read_uint = cgroup_read_notify_on_release,
+		.write = cgroup_common_file_write,
+		.private = FILE_NOTIFY_ON_RELEASE,
+	},
+
+	{
+		.name = "releasable",
+		.read_uint = cgroup_read_releasable,
+		.private = FILE_RELEASABLE,
+	}
+};
+
+static struct cftype cft_release_agent = {
+	.name = "release_agent",
+	.read = cgroup_common_file_read,
+	.write = cgroup_common_file_write,
+	.private = FILE_RELEASE_AGENT,
+};
+
+static int cgroup_populate_dir(struct cgroup *cgrp)
+{
+	int err;
+	struct cgroup_subsys *ss;
+
+	/* First clear out any existing files */
+	cgroup_clear_directory(cgrp->dentry);
+
+	err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
+	if (err < 0)
+		return err;
+
+	if (cgrp == cgrp->top_cgroup) {
+		if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
+			return err;
+	}
+
+	for_each_subsys(cgrp->root, ss) {
+		if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static void init_cgroup_css(struct cgroup_subsys_state *css,
+			       struct cgroup_subsys *ss,
+			       struct cgroup *cgrp)
+{
+	css->cgroup = cgrp;
+	atomic_set(&css->refcnt, 0);
+	css->flags = 0;
+	if (cgrp == dummytop)
+		set_bit(CSS_ROOT, &css->flags);
+	BUG_ON(cgrp->subsys[ss->subsys_id]);
+	cgrp->subsys[ss->subsys_id] = css;
+}
+
+/*
+ *	cgroup_create - create a cgroup
+ *	parent:	cgroup that will be parent of the new cgroup.
+ *	name:		name of the new cgroup. Will be strcpy'ed.
+ *	mode:		mode to set on new inode
+ *
+ *	Must be called with the mutex on the parent inode held
+ */
+
+static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
+			     int mode)
+{
+	struct cgroup *cgrp;
+	struct cgroupfs_root *root = parent->root;
+	int err = 0;
+	struct cgroup_subsys *ss;
+	struct super_block *sb = root->sb;
+
+	cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
+	if (!cgrp)
+		return -ENOMEM;
+
+	/* Grab a reference on the superblock so the hierarchy doesn't
+	 * get deleted on unmount if there are child cgroups.  This
+	 * can be done outside cgroup_mutex, since the sb can't
+	 * disappear while someone has an open control file on the
+	 * fs */
+	atomic_inc(&sb->s_active);
+
+	mutex_lock(&cgroup_mutex);
+
+	cgrp->flags = 0;
+	INIT_LIST_HEAD(&cgrp->sibling);
+	INIT_LIST_HEAD(&cgrp->children);
+	INIT_LIST_HEAD(&cgrp->css_sets);
+	INIT_LIST_HEAD(&cgrp->release_list);
+
+	cgrp->parent = parent;
+	cgrp->root = parent->root;
+	cgrp->top_cgroup = parent->top_cgroup;
+
+	for_each_subsys(root, ss) {
+		struct cgroup_subsys_state *css = ss->create(ss, cgrp);
+		if (IS_ERR(css)) {
+			err = PTR_ERR(css);
+			goto err_destroy;
+		}
+		init_cgroup_css(css, ss, cgrp);
+	}
+
+	list_add(&cgrp->sibling, &cgrp->parent->children);
+	root->number_of_cgroups++;
+
+	err = cgroup_create_dir(cgrp, dentry, mode);
+	if (err < 0)
+		goto err_remove;
+
+	/* The cgroup directory was pre-locked for us */
+	BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
+
+	err = cgroup_populate_dir(cgrp);
+	/* If err < 0, we have a half-filled directory - oh well ;) */
+
+	mutex_unlock(&cgroup_mutex);
+	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+
+	return 0;
+
+ err_remove:
+
+	list_del(&cgrp->sibling);
+	root->number_of_cgroups--;
+
+ err_destroy:
+
+	for_each_subsys(root, ss) {
+		if (cgrp->subsys[ss->subsys_id])
+			ss->destroy(ss, cgrp);
+	}
+
+	mutex_unlock(&cgroup_mutex);
+
+	/* Release the reference count that we took on the superblock */
+	deactivate_super(sb);
+
+	kfree(cgrp);
+	return err;
+}
+
+static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct cgroup *c_parent = dentry->d_parent->d_fsdata;
+
+	/* the vfs holds inode->i_mutex already */
+	return cgroup_create(c_parent, dentry, mode | S_IFDIR);
+}
+
+static inline int cgroup_has_css_refs(struct cgroup *cgrp)
+{
+	/* Check the reference count on each subsystem. Since we
+	 * already established that there are no tasks in the
+	 * cgroup, if the css refcount is also 0, then there should
+	 * be no outstanding references, so the subsystem is safe to
+	 * destroy. We scan across all subsystems rather than using
+	 * the per-hierarchy linked list of mounted subsystems since
+	 * we can be called via check_for_release() with no
+	 * synchronization other than RCU, and the subsystem linked
+	 * list isn't RCU-safe */
+	int i;
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		struct cgroup_subsys_state *css;
+		/* Skip subsystems not in this hierarchy */
+		if (ss->root != cgrp->root)
+			continue;
+		css = cgrp->subsys[ss->subsys_id];
+		/* When called from check_for_release() it's possible
+		 * that by this point the cgroup has been removed
+		 * and the css deleted. But a false-positive doesn't
+		 * matter, since it can only happen if the cgroup
+		 * has been deleted and hence no longer needs the
+		 * release agent to be called anyway. */
+		if (css && atomic_read(&css->refcnt)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
+{
+	struct cgroup *cgrp = dentry->d_fsdata;
+	struct dentry *d;
+	struct cgroup *parent;
+	struct cgroup_subsys *ss;
+	struct super_block *sb;
+	struct cgroupfs_root *root;
+
+	/* the vfs holds both inode->i_mutex already */
+
+	mutex_lock(&cgroup_mutex);
+	if (atomic_read(&cgrp->count) != 0) {
+		mutex_unlock(&cgroup_mutex);
+		return -EBUSY;
+	}
+	if (!list_empty(&cgrp->children)) {
+		mutex_unlock(&cgroup_mutex);
+		return -EBUSY;
+	}
+
+	parent = cgrp->parent;
+	root = cgrp->root;
+	sb = root->sb;
+
+	if (cgroup_has_css_refs(cgrp)) {
+		mutex_unlock(&cgroup_mutex);
+		return -EBUSY;
+	}
+
+	for_each_subsys(root, ss) {
+		if (cgrp->subsys[ss->subsys_id])
+			ss->destroy(ss, cgrp);
+	}
+
+	spin_lock(&release_list_lock);
+	set_bit(CGRP_REMOVED, &cgrp->flags);
+	if (!list_empty(&cgrp->release_list))
+		list_del(&cgrp->release_list);
+	spin_unlock(&release_list_lock);
+	/* delete my sibling from parent->children */
+	list_del(&cgrp->sibling);
+	spin_lock(&cgrp->dentry->d_lock);
+	d = dget(cgrp->dentry);
+	cgrp->dentry = NULL;
+	spin_unlock(&d->d_lock);
+
+	cgroup_d_remove_dir(d);
+	dput(d);
+	root->number_of_cgroups--;
+
+	set_bit(CGRP_RELEASABLE, &parent->flags);
+	check_for_release(parent);
+
+	mutex_unlock(&cgroup_mutex);
+	/* Drop the active superblock reference that we took when we
+	 * created the cgroup */
+	deactivate_super(sb);
+	return 0;
+}
+
+static void cgroup_init_subsys(struct cgroup_subsys *ss)
+{
+	struct cgroup_subsys_state *css;
+	struct list_head *l;
+	printk(KERN_ERR "Initializing cgroup subsys %s\n", ss->name);
+
+	/* Create the top cgroup state for this subsystem */
+	ss->root = &rootnode;
+	css = ss->create(ss, dummytop);
+	/* We don't handle early failures gracefully */
+	BUG_ON(IS_ERR(css));
+	init_cgroup_css(css, ss, dummytop);
+
+	/* Update all cgroup groups to contain a subsys
+	 * pointer to this state - since the subsystem is
+	 * newly registered, all tasks and hence all cgroup
+	 * groups are in the subsystem's top cgroup. */
+	write_lock(&css_set_lock);
+	l = &init_css_set.list;
+	do {
+		struct css_set *cg =
+			list_entry(l, struct css_set, list);
+		cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
+		l = l->next;
+	} while (l != &init_css_set.list);
+	write_unlock(&css_set_lock);
+
+ 	/* If this subsystem requested that it be notified with fork
+ 	 * events, we should send it one now for every process in the
+ 	 * system */
+	if (ss->fork) {
+		struct task_struct *g, *p;
+
+		read_lock(&tasklist_lock);
+		do_each_thread(g, p) {
+			ss->fork(ss, p);
+		} while_each_thread(g, p);
+		read_unlock(&tasklist_lock);
+	}
+
+	need_forkexit_callback |= ss->fork || ss->exit;
+
+	ss->active = 1;
+}
+
+/**
+ * cgroup_init_early - initialize cgroups at system boot, and
+ * initialize any subsystems that request early init.
+ */
+int __init cgroup_init_early(void)
+{
+	int i;
+	kref_init(&init_css_set.ref);
+	kref_get(&init_css_set.ref);
+	INIT_LIST_HEAD(&init_css_set.list);
+	INIT_LIST_HEAD(&init_css_set.cg_links);
+	INIT_LIST_HEAD(&init_css_set.tasks);
+	css_set_count = 1;
+	init_cgroup_root(&rootnode);
+	list_add(&rootnode.root_list, &roots);
+	root_count = 1;
+	init_task.cgroups = &init_css_set;
+
+	init_css_set_link.cg = &init_css_set;
+	list_add(&init_css_set_link.cgrp_link_list,
+		 &rootnode.top_cgroup.css_sets);
+	list_add(&init_css_set_link.cg_link_list,
+		 &init_css_set.cg_links);
+
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+
+		BUG_ON(!ss->name);
+		BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
+		BUG_ON(!ss->create);
+		BUG_ON(!ss->destroy);
+		if (ss->subsys_id != i) {
+			printk(KERN_ERR "Subsys %s id == %d\n",
+			       ss->name, ss->subsys_id);
+			BUG();
+		}
+
+		if (ss->early_init)
+			cgroup_init_subsys(ss);
+	}
+	return 0;
+}
+
+/**
+ * cgroup_init - register cgroup filesystem and /proc file, and
+ * initialize any subsystems that didn't request early init.
+ */
+int __init cgroup_init(void)
+{
+	int err;
+	int i;
+	struct proc_dir_entry *entry;
+
+	err = bdi_init(&cgroup_backing_dev_info);
+	if (err)
+		return err;
+
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		if (!ss->early_init)
+			cgroup_init_subsys(ss);
+	}
+
+	err = register_filesystem(&cgroup_fs_type);
+	if (err < 0)
+		goto out;
+
+	entry = create_proc_entry("cgroups", 0, NULL);
+	if (entry)
+		entry->proc_fops = &proc_cgroupstats_operations;
+
+out:
+	if (err)
+		bdi_destroy(&cgroup_backing_dev_info);
+
+	return err;
+}
+
+/*
+ * proc_cgroup_show()
+ *  - Print task's cgroup paths into seq_file, one line for each hierarchy
+ *  - Used for /proc/<pid>/cgroup.
+ *  - No need to task_lock(tsk) on this tsk->cgroup reference, as it
+ *    doesn't really matter if tsk->cgroup changes after we read it,
+ *    and we take cgroup_mutex, keeping attach_task() from changing it
+ *    anyway.  No need to check that tsk->cgroup != NULL, thanks to
+ *    the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
+ *    cgroup to top_cgroup.
+ */
+
+/* TODO: Use a proper seq_file iterator */
+static int proc_cgroup_show(struct seq_file *m, void *v)
+{
+	struct pid *pid;
+	struct task_struct *tsk;
+	char *buf;
+	int retval;
+	struct cgroupfs_root *root;
+
+	retval = -ENOMEM;
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	retval = -ESRCH;
+	pid = m->private;
+	tsk = get_pid_task(pid, PIDTYPE_PID);
+	if (!tsk)
+		goto out_free;
+
+	retval = 0;
+
+	mutex_lock(&cgroup_mutex);
+
+	for_each_root(root) {
+		struct cgroup_subsys *ss;
+		struct cgroup *cgrp;
+		int subsys_id;
+		int count = 0;
+
+		/* Skip this hierarchy if it has no active subsystems */
+		if (!root->actual_subsys_bits)
+			continue;
+		for_each_subsys(root, ss)
+			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+		seq_putc(m, ':');
+		get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
+		cgrp = task_cgroup(tsk, subsys_id);
+		retval = cgroup_path(cgrp, buf, PAGE_SIZE);
+		if (retval < 0)
+			goto out_unlock;
+		seq_puts(m, buf);
+		seq_putc(m, '\n');
+	}
+
+out_unlock:
+	mutex_unlock(&cgroup_mutex);
+	put_task_struct(tsk);
+out_free:
+	kfree(buf);
+out:
+	return retval;
+}
+
+static int cgroup_open(struct inode *inode, struct file *file)
+{
+	struct pid *pid = PROC_I(inode)->pid;
+	return single_open(file, proc_cgroup_show, pid);
+}
+
+struct file_operations proc_cgroup_operations = {
+	.open		= cgroup_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+/* Display information about each subsystem and each hierarchy */
+static int proc_cgroupstats_show(struct seq_file *m, void *v)
+{
+	int i;
+	struct cgroupfs_root *root;
+
+	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n");
+	mutex_lock(&cgroup_mutex);
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		seq_printf(m, "%s\t%lu\t%d\n",
+			   ss->name, ss->root->subsys_bits,
+			   ss->root->number_of_cgroups);
+	}
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
+static int cgroupstats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_cgroupstats_show, 0);
+}
+
+static struct file_operations proc_cgroupstats_operations = {
+	.open = cgroupstats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * cgroup_fork - attach newly forked task to its parents cgroup.
+ * @tsk: pointer to task_struct of forking parent process.
+ *
+ * Description: A task inherits its parent's cgroup at fork().
+ *
+ * A pointer to the shared css_set was automatically copied in
+ * fork.c by dup_task_struct().  However, we ignore that copy, since
+ * it was not made under the protection of RCU or cgroup_mutex, so
+ * might no longer be a valid cgroup pointer.  attach_task() might
+ * have already changed current->cgroups, allowing the previously
+ * referenced cgroup group to be removed and freed.
+ *
+ * At the point that cgroup_fork() is called, 'current' is the parent
+ * task, and the passed argument 'child' points to the child task.
+ */
+void cgroup_fork(struct task_struct *child)
+{
+	task_lock(current);
+	child->cgroups = current->cgroups;
+	get_css_set(child->cgroups);
+	task_unlock(current);
+	INIT_LIST_HEAD(&child->cg_list);
+}
+
+/**
+ * cgroup_fork_callbacks - called on a new task very soon before
+ * adding it to the tasklist. No need to take any locks since no-one
+ * can be operating on this task
+ */
+void cgroup_fork_callbacks(struct task_struct *child)
+{
+	if (need_forkexit_callback) {
+		int i;
+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+			struct cgroup_subsys *ss = subsys[i];
+			if (ss->fork)
+				ss->fork(ss, child);
+		}
+	}
+}
+
+/**
+ * cgroup_post_fork - called on a new task after adding it to the
+ * task list. Adds the task to the list running through its css_set
+ * if necessary. Has to be after the task is visible on the task list
+ * in case we race with the first call to cgroup_iter_start() - to
+ * guarantee that the new task ends up on its list. */
+void cgroup_post_fork(struct task_struct *child)
+{
+	if (use_task_css_set_links) {
+		write_lock(&css_set_lock);
+		if (list_empty(&child->cg_list))
+			list_add(&child->cg_list, &child->cgroups->tasks);
+		write_unlock(&css_set_lock);
+	}
+}
+/**
+ * cgroup_exit - detach cgroup from exiting task
+ * @tsk: pointer to task_struct of exiting process
+ *
+ * Description: Detach cgroup from @tsk and release it.
+ *
+ * Note that cgroups marked notify_on_release force every task in
+ * them to take the global cgroup_mutex mutex when exiting.
+ * This could impact scaling on very large systems.  Be reluctant to
+ * use notify_on_release cgroups where very high task exit scaling
+ * is required on large systems.
+ *
+ * the_top_cgroup_hack:
+ *
+ *    Set the exiting tasks cgroup to the root cgroup (top_cgroup).
+ *
+ *    We call cgroup_exit() while the task is still competent to
+ *    handle notify_on_release(), then leave the task attached to the
+ *    root cgroup in each hierarchy for the remainder of its exit.
+ *
+ *    To do this properly, we would increment the reference count on
+ *    top_cgroup, and near the very end of the kernel/exit.c do_exit()
+ *    code we would add a second cgroup function call, to drop that
+ *    reference.  This would just create an unnecessary hot spot on
+ *    the top_cgroup reference count, to no avail.
+ *
+ *    Normally, holding a reference to a cgroup without bumping its
+ *    count is unsafe.   The cgroup could go away, or someone could
+ *    attach us to a different cgroup, decrementing the count on
+ *    the first cgroup that we never incremented.  But in this case,
+ *    top_cgroup isn't going away, and either task has PF_EXITING set,
+ *    which wards off any attach_task() attempts, or task is a failed
+ *    fork, never visible to attach_task.
+ *
+ */
+void cgroup_exit(struct task_struct *tsk, int run_callbacks)
+{
+	int i;
+	struct css_set *cg;
+
+	if (run_callbacks && need_forkexit_callback) {
+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+			struct cgroup_subsys *ss = subsys[i];
+			if (ss->exit)
+				ss->exit(ss, tsk);
+		}
+	}
+
+	/*
+	 * Unlink from the css_set task list if necessary.
+	 * Optimistically check cg_list before taking
+	 * css_set_lock
+	 */
+	if (!list_empty(&tsk->cg_list)) {
+		write_lock(&css_set_lock);
+		if (!list_empty(&tsk->cg_list))
+			list_del(&tsk->cg_list);
+		write_unlock(&css_set_lock);
+	}
+
+	/* Reassign the task to the init_css_set. */
+	task_lock(tsk);
+	cg = tsk->cgroups;
+	tsk->cgroups = &init_css_set;
+	task_unlock(tsk);
+	if (cg)
+		put_css_set_taskexit(cg);
+}
+
+/**
+ * cgroup_clone - duplicate the current cgroup in the hierarchy
+ * that the given subsystem is attached to, and move this task into
+ * the new child
+ */
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+{
+	struct dentry *dentry;
+	int ret = 0;
+	char nodename[MAX_CGROUP_TYPE_NAMELEN];
+	struct cgroup *parent, *child;
+	struct inode *inode;
+	struct css_set *cg;
+	struct cgroupfs_root *root;
+	struct cgroup_subsys *ss;
+
+	/* We shouldn't be called by an unregistered subsystem */
+	BUG_ON(!subsys->active);
+
+	/* First figure out what hierarchy and cgroup we're dealing
+	 * with, and pin them so we can drop cgroup_mutex */
+	mutex_lock(&cgroup_mutex);
+ again:
+	root = subsys->root;
+	if (root == &rootnode) {
+		printk(KERN_INFO
+		       "Not cloning cgroup for unused subsystem %s\n",
+		       subsys->name);
+		mutex_unlock(&cgroup_mutex);
+		return 0;
+	}
+	cg = tsk->cgroups;
+	parent = task_cgroup(tsk, subsys->subsys_id);
+
+	snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid);
+
+	/* Pin the hierarchy */
+	atomic_inc(&parent->root->sb->s_active);
+
+	/* Keep the cgroup alive */
+	get_css_set(cg);
+	mutex_unlock(&cgroup_mutex);
+
+	/* Now do the VFS work to create a cgroup */
+	inode = parent->dentry->d_inode;
+
+	/* Hold the parent directory mutex across this operation to
+	 * stop anyone else deleting the new cgroup */
+	mutex_lock(&inode->i_mutex);
+	dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
+	if (IS_ERR(dentry)) {
+		printk(KERN_INFO
+		       "Couldn't allocate dentry for %s: %ld\n", nodename,
+		       PTR_ERR(dentry));
+		ret = PTR_ERR(dentry);
+		goto out_release;
+	}
+
+	/* Create the cgroup directory, which also creates the cgroup */
+	ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
+	child = __d_cgrp(dentry);
+	dput(dentry);
+	if (ret) {
+		printk(KERN_INFO
+		       "Failed to create cgroup %s: %d\n", nodename,
+		       ret);
+		goto out_release;
+	}
+
+	if (!child) {
+		printk(KERN_INFO
+		       "Couldn't find new cgroup %s\n", nodename);
+		ret = -ENOMEM;
+		goto out_release;
+	}
+
+	/* The cgroup now exists. Retake cgroup_mutex and check
+	 * that we're still in the same state that we thought we
+	 * were. */
+	mutex_lock(&cgroup_mutex);
+	if ((root != subsys->root) ||
+	    (parent != task_cgroup(tsk, subsys->subsys_id))) {
+		/* Aargh, we raced ... */
+		mutex_unlock(&inode->i_mutex);
+		put_css_set(cg);
+
+		deactivate_super(parent->root->sb);
+		/* The cgroup is still accessible in the VFS, but
+		 * we're not going to try to rmdir() it at this
+		 * point. */
+		printk(KERN_INFO
+		       "Race in cgroup_clone() - leaking cgroup %s\n",
+		       nodename);
+		goto again;
+	}
+
+	/* do any required auto-setup */
+	for_each_subsys(root, ss) {
+		if (ss->post_clone)
+			ss->post_clone(ss, child);
+	}
+
+	/* All seems fine. Finish by moving the task into the new cgroup */
+	ret = attach_task(child, tsk);
+	mutex_unlock(&cgroup_mutex);
+
+ out_release:
+	mutex_unlock(&inode->i_mutex);
+
+	mutex_lock(&cgroup_mutex);
+	put_css_set(cg);
+	mutex_unlock(&cgroup_mutex);
+	deactivate_super(parent->root->sb);
+	return ret;
+}
+
+/*
+ * See if "cgrp" is a descendant of the current task's cgroup in
+ * the appropriate hierarchy
+ *
+ * If we are sending in dummytop, then presumably we are creating
+ * the top cgroup in the subsystem.
+ *
+ * Called only by the ns (nsproxy) cgroup.
+ */
+int cgroup_is_descendant(const struct cgroup *cgrp)
+{
+	int ret;
+	struct cgroup *target;
+	int subsys_id;
+
+	if (cgrp == dummytop)
+		return 1;
+
+	get_first_subsys(cgrp, NULL, &subsys_id);
+	target = task_cgroup(current, subsys_id);
+	while (cgrp != target && cgrp!= cgrp->top_cgroup)
+		cgrp = cgrp->parent;
+	ret = (cgrp == target);
+	return ret;
+}
+
+static void check_for_release(struct cgroup *cgrp)
+{
+	/* All of these checks rely on RCU to keep the cgroup
+	 * structure alive */
+	if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
+	    && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
+		/* Control Group is currently removeable. If it's not
+		 * already queued for a userspace notification, queue
+		 * it now */
+		int need_schedule_work = 0;
+		spin_lock(&release_list_lock);
+		if (!cgroup_is_removed(cgrp) &&
+		    list_empty(&cgrp->release_list)) {
+			list_add(&cgrp->release_list, &release_list);
+			need_schedule_work = 1;
+		}
+		spin_unlock(&release_list_lock);
+		if (need_schedule_work)
+			schedule_work(&release_agent_work);
+	}
+}
+
+void __css_put(struct cgroup_subsys_state *css)
+{
+	struct cgroup *cgrp = css->cgroup;
+	rcu_read_lock();
+	if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
+		set_bit(CGRP_RELEASABLE, &cgrp->flags);
+		check_for_release(cgrp);
+	}
+	rcu_read_unlock();
+}
+
+/*
+ * Notify userspace when a cgroup is released, by running the
+ * configured release agent with the name of the cgroup (path
+ * relative to the root of cgroup file system) as the argument.
+ *
+ * Most likely, this user command will try to rmdir this cgroup.
+ *
+ * This races with the possibility that some other task will be
+ * attached to this cgroup before it is removed, or that some other
+ * user task will 'mkdir' a child cgroup of this cgroup.  That's ok.
+ * The presumed 'rmdir' will fail quietly if this cgroup is no longer
+ * unused, and this cgroup will be reprieved from its death sentence,
+ * to continue to serve a useful existence.  Next time it's released,
+ * we will get notified again, if it still has 'notify_on_release' set.
+ *
+ * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
+ * means only wait until the task is successfully execve()'d.  The
+ * separate release agent task is forked by call_usermodehelper(),
+ * then control in this thread returns here, without waiting for the
+ * release agent task.  We don't bother to wait because the caller of
+ * this routine has no use for the exit status of the release agent
+ * task, so no sense holding our caller up for that.
+ *
+ */
+
+static void cgroup_release_agent(struct work_struct *work)
+{
+	BUG_ON(work != &release_agent_work);
+	mutex_lock(&cgroup_mutex);
+	spin_lock(&release_list_lock);
+	while (!list_empty(&release_list)) {
+		char *argv[3], *envp[3];
+		int i;
+		char *pathbuf;
+		struct cgroup *cgrp = list_entry(release_list.next,
+						    struct cgroup,
+						    release_list);
+		list_del_init(&cgrp->release_list);
+		spin_unlock(&release_list_lock);
+		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!pathbuf) {
+			spin_lock(&release_list_lock);
+			continue;
+		}
+
+		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
+			kfree(pathbuf);
+			spin_lock(&release_list_lock);
+			continue;
+		}
+
+		i = 0;
+		argv[i++] = cgrp->root->release_agent_path;
+		argv[i++] = (char *)pathbuf;
+		argv[i] = NULL;
+
+		i = 0;
+		/* minimal command environment */
+		envp[i++] = "HOME=/";
+		envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+		envp[i] = NULL;
+
+		/* Drop the lock while we invoke the usermode helper,
+		 * since the exec could involve hitting disk and hence
+		 * be a slow process */
+		mutex_unlock(&cgroup_mutex);
+		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+		kfree(pathbuf);
+		mutex_lock(&cgroup_mutex);
+		spin_lock(&release_list_lock);
+	}
+	spin_unlock(&release_list_lock);
+	mutex_unlock(&cgroup_mutex);
+}
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
new file mode 100644
index 0000000..37301e8
--- /dev/null
+++ b/kernel/cgroup_debug.c
@@ -0,0 +1,97 @@
+/*
+ * kernel/ccontainer_debug.c - Example cgroup subsystem that
+ * exposes debug info
+ *
+ * Copyright (C) Google Inc, 2007
+ *
+ * Developed by Paul Menage (menage@google.com)
+ *
+ */
+
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+
+#include <asm/atomic.h>
+
+static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
+						   struct cgroup *cont)
+{
+	struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
+
+	if (!css)
+		return ERR_PTR(-ENOMEM);
+
+	return css;
+}
+
+static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	kfree(cont->subsys[debug_subsys_id]);
+}
+
+static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
+{
+	return atomic_read(&cont->count);
+}
+
+static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
+{
+	u64 count;
+
+	cgroup_lock();
+	count = cgroup_task_count(cont);
+	cgroup_unlock();
+	return count;
+}
+
+static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
+{
+	return (u64)(long)current->cgroups;
+}
+
+static u64 current_css_set_refcount_read(struct cgroup *cont,
+					   struct cftype *cft)
+{
+	u64 count;
+
+	rcu_read_lock();
+	count = atomic_read(&current->cgroups->ref.refcount);
+	rcu_read_unlock();
+	return count;
+}
+
+static struct cftype files[] =  {
+	{
+		.name = "cgroup_refcount",
+		.read_uint = cgroup_refcount_read,
+	},
+	{
+		.name = "taskcount",
+		.read_uint = taskcount_read,
+	},
+
+	{
+		.name = "current_css_set",
+		.read_uint = current_css_set_read,
+	},
+
+	{
+		.name = "current_css_set_refcount",
+		.read_uint = current_css_set_refcount_read,
+	},
+};
+
+static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys debug_subsys = {
+	.name = "debug",
+	.create = debug_create,
+	.destroy = debug_destroy,
+	.populate = debug_populate,
+	.subsys_id = debug_subsys_id,
+};
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a21f71a..6b3a0c1 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -98,7 +98,8 @@
 		     !cputime_eq(p->stime, cputime_zero)))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
 				(state = %ld, flags = %x) \n",
-				 p->comm, p->pid, cpu, p->state, p->flags);
+				 p->comm, task_pid_nr(p), cpu,
+				 p->state, p->flags);
 	}
 	write_unlock_irq(&tasklist_lock);
 }
@@ -264,6 +265,15 @@
 int __cpuinit cpu_up(unsigned int cpu)
 {
 	int err = 0;
+	if (!cpu_isset(cpu, cpu_possible_map)) {
+		printk(KERN_ERR "can't online cpu %d because it is not "
+			"configured as may-hotadd at boot time\n", cpu);
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390)
+		printk(KERN_ERR "please check additional_cpus= boot "
+				"parameter\n");
+#endif
+		return -EINVAL;
+	}
 
 	mutex_lock(&cpu_add_remove_lock);
 	if (cpu_hotplug_disabled)
diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c
new file mode 100644
index 0000000..731e47e
--- /dev/null
+++ b/kernel/cpu_acct.c
@@ -0,0 +1,186 @@
+/*
+ * kernel/cpu_acct.c - CPU accounting cgroup subsystem
+ *
+ * Copyright (C) Google Inc, 2006
+ *
+ * Developed by Paul Menage (menage@google.com) and Balbir Singh
+ * (balbir@in.ibm.com)
+ *
+ */
+
+/*
+ * Example cgroup subsystem for reporting total CPU usage of tasks in a
+ * cgroup, along with percentage load over a time interval
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/rcupdate.h>
+
+#include <asm/div64.h>
+
+struct cpuacct {
+	struct cgroup_subsys_state css;
+	spinlock_t lock;
+	/* total time used by this class */
+	cputime64_t time;
+
+	/* time when next load calculation occurs */
+	u64 next_interval_check;
+
+	/* time used in current period */
+	cputime64_t current_interval_time;
+
+	/* time used in last period */
+	cputime64_t last_interval_time;
+};
+
+struct cgroup_subsys cpuacct_subsys;
+
+static inline struct cpuacct *cgroup_ca(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id),
+			    struct cpuacct, css);
+}
+
+static inline struct cpuacct *task_ca(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, cpuacct_subsys_id),
+			    struct cpuacct, css);
+}
+
+#define INTERVAL (HZ * 10)
+
+static inline u64 next_interval_boundary(u64 now)
+{
+	/* calculate the next interval boundary beyond the
+	 * current time */
+	do_div(now, INTERVAL);
+	return (now + 1) * INTERVAL;
+}
+
+static struct cgroup_subsys_state *cpuacct_create(
+	struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+
+	if (!ca)
+		return ERR_PTR(-ENOMEM);
+	spin_lock_init(&ca->lock);
+	ca->next_interval_check = next_interval_boundary(get_jiffies_64());
+	return &ca->css;
+}
+
+static void cpuacct_destroy(struct cgroup_subsys *ss,
+			    struct cgroup *cont)
+{
+	kfree(cgroup_ca(cont));
+}
+
+/* Lazily update the load calculation if necessary. Called with ca locked */
+static void cpuusage_update(struct cpuacct *ca)
+{
+	u64 now = get_jiffies_64();
+
+	/* If we're not due for an update, return */
+	if (ca->next_interval_check > now)
+		return;
+
+	if (ca->next_interval_check <= (now - INTERVAL)) {
+		/* If it's been more than an interval since the last
+		 * check, then catch up - the last interval must have
+		 * been zero load */
+		ca->last_interval_time = 0;
+		ca->next_interval_check = next_interval_boundary(now);
+	} else {
+		/* If a steal takes the last interval time negative,
+		 * then we just ignore it */
+		if ((s64)ca->current_interval_time > 0)
+			ca->last_interval_time = ca->current_interval_time;
+		else
+			ca->last_interval_time = 0;
+		ca->next_interval_check += INTERVAL;
+	}
+	ca->current_interval_time = 0;
+}
+
+static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft)
+{
+	struct cpuacct *ca = cgroup_ca(cont);
+	u64 time;
+
+	spin_lock_irq(&ca->lock);
+	cpuusage_update(ca);
+	time = cputime64_to_jiffies64(ca->time);
+	spin_unlock_irq(&ca->lock);
+
+	/* Convert 64-bit jiffies to seconds */
+	time *= 1000;
+	do_div(time, HZ);
+	return time;
+}
+
+static u64 load_read(struct cgroup *cont, struct cftype *cft)
+{
+	struct cpuacct *ca = cgroup_ca(cont);
+	u64 time;
+
+	/* Find the time used in the previous interval */
+	spin_lock_irq(&ca->lock);
+	cpuusage_update(ca);
+	time = cputime64_to_jiffies64(ca->last_interval_time);
+	spin_unlock_irq(&ca->lock);
+
+	/* Convert time to a percentage, to give the load in the
+	 * previous period */
+	time *= 100;
+	do_div(time, INTERVAL);
+
+	return time;
+}
+
+static struct cftype files[] = {
+	{
+		.name = "usage",
+		.read_uint = cpuusage_read,
+	},
+	{
+		.name = "load",
+		.read_uint = load_read,
+	}
+};
+
+static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+}
+
+void cpuacct_charge(struct task_struct *task, cputime_t cputime)
+{
+
+	struct cpuacct *ca;
+	unsigned long flags;
+
+	if (!cpuacct_subsys.active)
+		return;
+	rcu_read_lock();
+	ca = task_ca(task);
+	if (ca) {
+		spin_lock_irqsave(&ca->lock, flags);
+		cpuusage_update(ca);
+		ca->time = cputime64_add(ca->time, cputime);
+		ca->current_interval_time =
+			cputime64_add(ca->current_interval_time, cputime);
+		spin_unlock_irqrestore(&ca->lock, flags);
+	}
+	rcu_read_unlock();
+}
+
+struct cgroup_subsys cpuacct_subsys = {
+	.name = "cpuacct",
+	.create = cpuacct_create,
+	.destroy = cpuacct_destroy,
+	.populate = cpuacct_populate,
+	.subsys_id = cpuacct_subsys_id,
+};
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 64950fa..50f5dc4 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -4,7 +4,8 @@
  *  Processor and Memory placement constraints for sets of tasks.
  *
  *  Copyright (C) 2003 BULL SA.
- *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
+ *  Copyright (C) 2006 Google, Inc
  *
  *  Portions derived from Patrick Mochel's sysfs code.
  *  sysfs is Copyright (c) 2001-3 Patrick Mochel
@@ -12,6 +13,7 @@
  *  2003-10-10 Written by Simon Derr.
  *  2003-10-22 Updates by Stephen Hemminger.
  *  2004 May-July Rework by Paul Jackson.
+ *  2006 Rework by Paul Menage to use generic cgroups
  *
  *  This file is subject to the terms and conditions of the GNU General Public
  *  License.  See the file COPYING in the main directory of the Linux
@@ -36,6 +38,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
+#include <linux/prio_heap.h>
 #include <linux/proc_fs.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
@@ -52,8 +55,7 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 #include <linux/mutex.h>
-
-#define CPUSET_SUPER_MAGIC		0x27e0eb
+#include <linux/kfifo.h>
 
 /*
  * Tracks how many cpusets are currently defined in system.
@@ -62,6 +64,10 @@
  */
 int number_of_cpusets __read_mostly;
 
+/* Retrieve the cpuset from a cgroup */
+struct cgroup_subsys cpuset_subsys;
+struct cpuset;
+
 /* See "Frequency meter" comments, below. */
 
 struct fmeter {
@@ -72,24 +78,13 @@
 };
 
 struct cpuset {
+	struct cgroup_subsys_state css;
+
 	unsigned long flags;		/* "unsigned long" so bitops work */
 	cpumask_t cpus_allowed;		/* CPUs allowed to tasks in cpuset */
 	nodemask_t mems_allowed;	/* Memory Nodes allowed to tasks */
 
-	/*
-	 * Count is atomic so can incr (fork) or decr (exit) without a lock.
-	 */
-	atomic_t count;			/* count tasks using this cpuset */
-
-	/*
-	 * We link our 'sibling' struct into our parents 'children'.
-	 * Our children link their 'sibling' into our 'children'.
-	 */
-	struct list_head sibling;	/* my parents children */
-	struct list_head children;	/* my children */
-
 	struct cpuset *parent;		/* my parent */
-	struct dentry *dentry;		/* cpuset fs entry */
 
 	/*
 	 * Copy of global cpuset_mems_generation as of the most
@@ -98,15 +93,32 @@
 	int mems_generation;
 
 	struct fmeter fmeter;		/* memory_pressure filter */
+
+	/* partition number for rebuild_sched_domains() */
+	int pn;
 };
 
+/* Retrieve the cpuset for a cgroup */
+static inline struct cpuset *cgroup_cs(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, cpuset_subsys_id),
+			    struct cpuset, css);
+}
+
+/* Retrieve the cpuset for a task */
+static inline struct cpuset *task_cs(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, cpuset_subsys_id),
+			    struct cpuset, css);
+}
+
+
 /* bits in struct cpuset flags field */
 typedef enum {
 	CS_CPU_EXCLUSIVE,
 	CS_MEM_EXCLUSIVE,
 	CS_MEMORY_MIGRATE,
-	CS_REMOVED,
-	CS_NOTIFY_ON_RELEASE,
+	CS_SCHED_LOAD_BALANCE,
 	CS_SPREAD_PAGE,
 	CS_SPREAD_SLAB,
 } cpuset_flagbits_t;
@@ -122,14 +134,9 @@
 	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
 }
 
-static inline int is_removed(const struct cpuset *cs)
+static inline int is_sched_load_balance(const struct cpuset *cs)
 {
-	return test_bit(CS_REMOVED, &cs->flags);
-}
-
-static inline int notify_on_release(const struct cpuset *cs)
-{
-	return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
+	return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 }
 
 static inline int is_memory_migrate(const struct cpuset *cs)
@@ -172,14 +179,8 @@
 	.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
 	.cpus_allowed = CPU_MASK_ALL,
 	.mems_allowed = NODE_MASK_ALL,
-	.count = ATOMIC_INIT(0),
-	.sibling = LIST_HEAD_INIT(top_cpuset.sibling),
-	.children = LIST_HEAD_INIT(top_cpuset.children),
 };
 
-static struct vfsmount *cpuset_mount;
-static struct super_block *cpuset_sb;
-
 /*
  * We have two global cpuset mutexes below.  They can nest.
  * It is ok to first take manage_mutex, then nest callback_mutex.  We also
@@ -263,297 +264,33 @@
  * the routine cpuset_update_task_memory_state().
  */
 
-static DEFINE_MUTEX(manage_mutex);
 static DEFINE_MUTEX(callback_mutex);
 
-/*
- * A couple of forward declarations required, due to cyclic reference loop:
- *  cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file
- *  -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir.
- */
-
-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode);
-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry);
-
-static struct backing_dev_info cpuset_backing_dev_info = {
-	.ra_pages = 0,		/* No readahead */
-	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
-};
-
-static struct inode *cpuset_new_inode(mode_t mode)
-{
-	struct inode *inode = new_inode(cpuset_sb);
-
-	if (inode) {
-		inode->i_mode = mode;
-		inode->i_uid = current->fsuid;
-		inode->i_gid = current->fsgid;
-		inode->i_blocks = 0;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-		inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
-	}
-	return inode;
-}
-
-static void cpuset_diput(struct dentry *dentry, struct inode *inode)
-{
-	/* is dentry a directory ? if so, kfree() associated cpuset */
-	if (S_ISDIR(inode->i_mode)) {
-		struct cpuset *cs = dentry->d_fsdata;
-		BUG_ON(!(is_removed(cs)));
-		kfree(cs);
-	}
-	iput(inode);
-}
-
-static struct dentry_operations cpuset_dops = {
-	.d_iput = cpuset_diput,
-};
-
-static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name)
-{
-	struct dentry *d = lookup_one_len(name, parent, strlen(name));
-	if (!IS_ERR(d))
-		d->d_op = &cpuset_dops;
-	return d;
-}
-
-static void remove_dir(struct dentry *d)
-{
-	struct dentry *parent = dget(d->d_parent);
-
-	d_delete(d);
-	simple_rmdir(parent->d_inode, d);
-	dput(parent);
-}
-
-/*
- * NOTE : the dentry must have been dget()'ed
- */
-static void cpuset_d_remove_dir(struct dentry *dentry)
-{
-	struct list_head *node;
-
-	spin_lock(&dcache_lock);
-	node = dentry->d_subdirs.next;
-	while (node != &dentry->d_subdirs) {
-		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
-		list_del_init(node);
-		if (d->d_inode) {
-			d = dget_locked(d);
-			spin_unlock(&dcache_lock);
-			d_delete(d);
-			simple_unlink(dentry->d_inode, d);
-			dput(d);
-			spin_lock(&dcache_lock);
-		}
-		node = dentry->d_subdirs.next;
-	}
-	list_del_init(&dentry->d_u.d_child);
-	spin_unlock(&dcache_lock);
-	remove_dir(dentry);
-}
-
-static struct super_operations cpuset_ops = {
-	.statfs = simple_statfs,
-	.drop_inode = generic_delete_inode,
-};
-
-static int cpuset_fill_super(struct super_block *sb, void *unused_data,
-							int unused_silent)
-{
-	struct inode *inode;
-	struct dentry *root;
-
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-	sb->s_magic = CPUSET_SUPER_MAGIC;
-	sb->s_op = &cpuset_ops;
-	cpuset_sb = sb;
-
-	inode = cpuset_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR);
-	if (inode) {
-		inode->i_op = &simple_dir_inode_operations;
-		inode->i_fop = &simple_dir_operations;
-		/* directories start off with i_nlink == 2 (for "." entry) */
-		inc_nlink(inode);
-	} else {
-		return -ENOMEM;
-	}
-
-	root = d_alloc_root(inode);
-	if (!root) {
-		iput(inode);
-		return -ENOMEM;
-	}
-	sb->s_root = root;
-	return 0;
-}
-
+/* This is ugly, but preserves the userspace API for existing cpuset
+ * users. If someone tries to mount the "cpuset" filesystem, we
+ * silently switch it to mount "cgroup" instead */
 static int cpuset_get_sb(struct file_system_type *fs_type,
 			 int flags, const char *unused_dev_name,
 			 void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
+	struct file_system_type *cgroup_fs = get_fs_type("cgroup");
+	int ret = -ENODEV;
+	if (cgroup_fs) {
+		char mountopts[] =
+			"cpuset,noprefix,"
+			"release_agent=/sbin/cpuset_release_agent";
+		ret = cgroup_fs->get_sb(cgroup_fs, flags,
+					   unused_dev_name, mountopts, mnt);
+		put_filesystem(cgroup_fs);
+	}
+	return ret;
 }
 
 static struct file_system_type cpuset_fs_type = {
 	.name = "cpuset",
 	.get_sb = cpuset_get_sb,
-	.kill_sb = kill_litter_super,
 };
 
-/* struct cftype:
- *
- * The files in the cpuset filesystem mostly have a very simple read/write
- * handling, some common function will take care of it. Nevertheless some cases
- * (read tasks) are special and therefore I define this structure for every
- * kind of file.
- *
- *
- * When reading/writing to a file:
- *	- the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
- *	- the 'cftype' of the file is file->f_path.dentry->d_fsdata
- */
-
-struct cftype {
-	char *name;
-	int private;
-	int (*open) (struct inode *inode, struct file *file);
-	ssize_t (*read) (struct file *file, char __user *buf, size_t nbytes,
-							loff_t *ppos);
-	int (*write) (struct file *file, const char __user *buf, size_t nbytes,
-							loff_t *ppos);
-	int (*release) (struct inode *inode, struct file *file);
-};
-
-static inline struct cpuset *__d_cs(struct dentry *dentry)
-{
-	return dentry->d_fsdata;
-}
-
-static inline struct cftype *__d_cft(struct dentry *dentry)
-{
-	return dentry->d_fsdata;
-}
-
-/*
- * Call with manage_mutex held.  Writes path of cpuset into buf.
- * Returns 0 on success, -errno on error.
- */
-
-static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
-{
-	char *start;
-
-	start = buf + buflen;
-
-	*--start = '\0';
-	for (;;) {
-		int len = cs->dentry->d_name.len;
-		if ((start -= len) < buf)
-			return -ENAMETOOLONG;
-		memcpy(start, cs->dentry->d_name.name, len);
-		cs = cs->parent;
-		if (!cs)
-			break;
-		if (!cs->parent)
-			continue;
-		if (--start < buf)
-			return -ENAMETOOLONG;
-		*start = '/';
-	}
-	memmove(buf, start, buf + buflen - start);
-	return 0;
-}
-
-/*
- * Notify userspace when a cpuset is released, by running
- * /sbin/cpuset_release_agent with the name of the cpuset (path
- * relative to the root of cpuset file system) as the argument.
- *
- * Most likely, this user command will try to rmdir this cpuset.
- *
- * This races with the possibility that some other task will be
- * attached to this cpuset before it is removed, or that some other
- * user task will 'mkdir' a child cpuset of this cpuset.  That's ok.
- * The presumed 'rmdir' will fail quietly if this cpuset is no longer
- * unused, and this cpuset will be reprieved from its death sentence,
- * to continue to serve a useful existence.  Next time it's released,
- * we will get notified again, if it still has 'notify_on_release' set.
- *
- * The final arg to call_usermodehelper() is 0, which means don't
- * wait.  The separate /sbin/cpuset_release_agent task is forked by
- * call_usermodehelper(), then control in this thread returns here,
- * without waiting for the release agent task.  We don't bother to
- * wait because the caller of this routine has no use for the exit
- * status of the /sbin/cpuset_release_agent task, so no sense holding
- * our caller up for that.
- *
- * When we had only one cpuset mutex, we had to call this
- * without holding it, to avoid deadlock when call_usermodehelper()
- * allocated memory.  With two locks, we could now call this while
- * holding manage_mutex, but we still don't, so as to minimize
- * the time manage_mutex is held.
- */
-
-static void cpuset_release_agent(const char *pathbuf)
-{
-	char *argv[3], *envp[3];
-	int i;
-
-	if (!pathbuf)
-		return;
-
-	i = 0;
-	argv[i++] = "/sbin/cpuset_release_agent";
-	argv[i++] = (char *)pathbuf;
-	argv[i] = NULL;
-
-	i = 0;
-	/* minimal command environment */
-	envp[i++] = "HOME=/";
-	envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-	envp[i] = NULL;
-
-	call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-	kfree(pathbuf);
-}
-
-/*
- * Either cs->count of using tasks transitioned to zero, or the
- * cs->children list of child cpusets just became empty.  If this
- * cs is notify_on_release() and now both the user count is zero and
- * the list of children is empty, prepare cpuset path in a kmalloc'd
- * buffer, to be returned via ppathbuf, so that the caller can invoke
- * cpuset_release_agent() with it later on, once manage_mutex is dropped.
- * Call here with manage_mutex held.
- *
- * This check_for_release() routine is responsible for kmalloc'ing
- * pathbuf.  The above cpuset_release_agent() is responsible for
- * kfree'ing pathbuf.  The caller of these routines is responsible
- * for providing a pathbuf pointer, initialized to NULL, then
- * calling check_for_release() with manage_mutex held and the address
- * of the pathbuf pointer, then dropping manage_mutex, then calling
- * cpuset_release_agent() with pathbuf, as set by check_for_release().
- */
-
-static void check_for_release(struct cpuset *cs, char **ppathbuf)
-{
-	if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
-	    list_empty(&cs->children)) {
-		char *buf;
-
-		buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-		if (!buf)
-			return;
-		if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
-			kfree(buf);
-		else
-			*ppathbuf = buf;
-	}
-}
-
 /*
  * Return in *pmask the portion of a cpusets's cpus_allowed that
  * are online.  If none are online, walk up the cpuset hierarchy
@@ -653,20 +390,19 @@
 	struct task_struct *tsk = current;
 	struct cpuset *cs;
 
-	if (tsk->cpuset == &top_cpuset) {
+	if (task_cs(tsk) == &top_cpuset) {
 		/* Don't need rcu for top_cpuset.  It's never freed. */
 		my_cpusets_mem_gen = top_cpuset.mems_generation;
 	} else {
 		rcu_read_lock();
-		cs = rcu_dereference(tsk->cpuset);
-		my_cpusets_mem_gen = cs->mems_generation;
+		my_cpusets_mem_gen = task_cs(current)->mems_generation;
 		rcu_read_unlock();
 	}
 
 	if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
 		mutex_lock(&callback_mutex);
 		task_lock(tsk);
-		cs = tsk->cpuset;	/* Maybe changed when task not locked */
+		cs = task_cs(tsk); /* Maybe changed when task not locked */
 		guarantee_online_mems(cs, &tsk->mems_allowed);
 		tsk->cpuset_mems_generation = cs->mems_generation;
 		if (is_spread_page(cs))
@@ -721,11 +457,12 @@
 
 static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 {
+	struct cgroup *cont;
 	struct cpuset *c, *par;
 
 	/* Each of our child cpusets must be a subset of us */
-	list_for_each_entry(c, &cur->children, sibling) {
-		if (!is_cpuset_subset(c, trial))
+	list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
+		if (!is_cpuset_subset(cgroup_cs(cont), trial))
 			return -EBUSY;
 	}
 
@@ -740,7 +477,8 @@
 		return -EACCES;
 
 	/* If either I or some sibling (!= me) is exclusive, we can't overlap */
-	list_for_each_entry(c, &par->children, sibling) {
+	list_for_each_entry(cont, &par->css.cgroup->children, sibling) {
+		c = cgroup_cs(cont);
 		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
 		    c != cur &&
 		    cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
@@ -751,17 +489,265 @@
 			return -EINVAL;
 	}
 
+	/* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
+	if (cgroup_task_count(cur->css.cgroup)) {
+		if (cpus_empty(trial->cpus_allowed) ||
+		    nodes_empty(trial->mems_allowed)) {
+			return -ENOSPC;
+		}
+	}
+
 	return 0;
 }
 
 /*
+ * Helper routine for rebuild_sched_domains().
+ * Do cpusets a, b have overlapping cpus_allowed masks?
+ */
+
+static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
+{
+	return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
+}
+
+/*
+ * rebuild_sched_domains()
+ *
+ * If the flag 'sched_load_balance' of any cpuset with non-empty
+ * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
+ * which has that flag enabled, or if any cpuset with a non-empty
+ * 'cpus' is removed, then call this routine to rebuild the
+ * scheduler's dynamic sched domains.
+ *
+ * This routine builds a partial partition of the systems CPUs
+ * (the set of non-overlappping cpumask_t's in the array 'part'
+ * below), and passes that partial partition to the kernel/sched.c
+ * partition_sched_domains() routine, which will rebuild the
+ * schedulers load balancing domains (sched domains) as specified
+ * by that partial partition.  A 'partial partition' is a set of
+ * non-overlapping subsets whose union is a subset of that set.
+ *
+ * See "What is sched_load_balance" in Documentation/cpusets.txt
+ * for a background explanation of this.
+ *
+ * Does not return errors, on the theory that the callers of this
+ * routine would rather not worry about failures to rebuild sched
+ * domains when operating in the severe memory shortage situations
+ * that could cause allocation failures below.
+ *
+ * Call with cgroup_mutex held.  May take callback_mutex during
+ * call due to the kfifo_alloc() and kmalloc() calls.  May nest
+ * a call to the lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
+ * Must not be called holding callback_mutex, because we must not
+ * call lock_cpu_hotplug() while holding callback_mutex.  Elsewhere
+ * the kernel nests callback_mutex inside lock_cpu_hotplug() calls.
+ * So the reverse nesting would risk an ABBA deadlock.
+ *
+ * The three key local variables below are:
+ *    q  - a kfifo queue of cpuset pointers, used to implement a
+ *	   top-down scan of all cpusets.  This scan loads a pointer
+ *	   to each cpuset marked is_sched_load_balance into the
+ *	   array 'csa'.  For our purposes, rebuilding the schedulers
+ *	   sched domains, we can ignore !is_sched_load_balance cpusets.
+ *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
+ *	   that need to be load balanced, for convenient iterative
+ *	   access by the subsequent code that finds the best partition,
+ *	   i.e the set of domains (subsets) of CPUs such that the
+ *	   cpus_allowed of every cpuset marked is_sched_load_balance
+ *	   is a subset of one of these domains, while there are as
+ *	   many such domains as possible, each as small as possible.
+ * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
+ *	   the kernel/sched.c routine partition_sched_domains() in a
+ *	   convenient format, that can be easily compared to the prior
+ *	   value to determine what partition elements (sched domains)
+ *	   were changed (added or removed.)
+ *
+ * Finding the best partition (set of domains):
+ *	The triple nested loops below over i, j, k scan over the
+ *	load balanced cpusets (using the array of cpuset pointers in
+ *	csa[]) looking for pairs of cpusets that have overlapping
+ *	cpus_allowed, but which don't have the same 'pn' partition
+ *	number and gives them in the same partition number.  It keeps
+ *	looping on the 'restart' label until it can no longer find
+ *	any such pairs.
+ *
+ *	The union of the cpus_allowed masks from the set of
+ *	all cpusets having the same 'pn' value then form the one
+ *	element of the partition (one sched domain) to be passed to
+ *	partition_sched_domains().
+ */
+
+static void rebuild_sched_domains(void)
+{
+	struct kfifo *q;	/* queue of cpusets to be scanned */
+	struct cpuset *cp;	/* scans q */
+	struct cpuset **csa;	/* array of all cpuset ptrs */
+	int csn;		/* how many cpuset ptrs in csa so far */
+	int i, j, k;		/* indices for partition finding loops */
+	cpumask_t *doms;	/* resulting partition; i.e. sched domains */
+	int ndoms;		/* number of sched domains in result */
+	int nslot;		/* next empty doms[] cpumask_t slot */
+
+	q = NULL;
+	csa = NULL;
+	doms = NULL;
+
+	/* Special case for the 99% of systems with one, full, sched domain */
+	if (is_sched_load_balance(&top_cpuset)) {
+		ndoms = 1;
+		doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+		if (!doms)
+			goto rebuild;
+		*doms = top_cpuset.cpus_allowed;
+		goto rebuild;
+	}
+
+	q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
+	if (IS_ERR(q))
+		goto done;
+	csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
+	if (!csa)
+		goto done;
+	csn = 0;
+
+	cp = &top_cpuset;
+	__kfifo_put(q, (void *)&cp, sizeof(cp));
+	while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
+		struct cgroup *cont;
+		struct cpuset *child;   /* scans child cpusets of cp */
+		if (is_sched_load_balance(cp))
+			csa[csn++] = cp;
+		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
+			child = cgroup_cs(cont);
+			__kfifo_put(q, (void *)&child, sizeof(cp));
+		}
+  	}
+
+	for (i = 0; i < csn; i++)
+		csa[i]->pn = i;
+	ndoms = csn;
+
+restart:
+	/* Find the best partition (set of sched domains) */
+	for (i = 0; i < csn; i++) {
+		struct cpuset *a = csa[i];
+		int apn = a->pn;
+
+		for (j = 0; j < csn; j++) {
+			struct cpuset *b = csa[j];
+			int bpn = b->pn;
+
+			if (apn != bpn && cpusets_overlap(a, b)) {
+				for (k = 0; k < csn; k++) {
+					struct cpuset *c = csa[k];
+
+					if (c->pn == bpn)
+						c->pn = apn;
+				}
+				ndoms--;	/* one less element */
+				goto restart;
+			}
+		}
+	}
+
+	/* Convert <csn, csa> to <ndoms, doms> */
+	doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
+	if (!doms)
+		goto rebuild;
+
+	for (nslot = 0, i = 0; i < csn; i++) {
+		struct cpuset *a = csa[i];
+		int apn = a->pn;
+
+		if (apn >= 0) {
+			cpumask_t *dp = doms + nslot;
+
+			if (nslot == ndoms) {
+				static int warnings = 10;
+				if (warnings) {
+					printk(KERN_WARNING
+					 "rebuild_sched_domains confused:"
+					  " nslot %d, ndoms %d, csn %d, i %d,"
+					  " apn %d\n",
+					  nslot, ndoms, csn, i, apn);
+					warnings--;
+				}
+				continue;
+			}
+
+			cpus_clear(*dp);
+			for (j = i; j < csn; j++) {
+				struct cpuset *b = csa[j];
+
+				if (apn == b->pn) {
+					cpus_or(*dp, *dp, b->cpus_allowed);
+					b->pn = -1;
+				}
+			}
+			nslot++;
+		}
+	}
+	BUG_ON(nslot != ndoms);
+
+rebuild:
+	/* Have scheduler rebuild sched domains */
+	lock_cpu_hotplug();
+	partition_sched_domains(ndoms, doms);
+	unlock_cpu_hotplug();
+
+done:
+	if (q && !IS_ERR(q))
+		kfifo_free(q);
+	kfree(csa);
+	/* Don't kfree(doms) -- partition_sched_domains() does that. */
+}
+
+static inline int started_after_time(struct task_struct *t1,
+				     struct timespec *time,
+				     struct task_struct *t2)
+{
+	int start_diff = timespec_compare(&t1->start_time, time);
+	if (start_diff > 0) {
+		return 1;
+	} else if (start_diff < 0) {
+		return 0;
+	} else {
+		/*
+		 * Arbitrarily, if two processes started at the same
+		 * time, we'll say that the lower pointer value
+		 * started first. Note that t2 may have exited by now
+		 * so this may not be a valid pointer any longer, but
+		 * that's fine - it still serves to distinguish
+		 * between two tasks started (effectively)
+		 * simultaneously.
+		 */
+		return t1 > t2;
+	}
+}
+
+static inline int started_after(void *p1, void *p2)
+{
+	struct task_struct *t1 = p1;
+	struct task_struct *t2 = p2;
+	return started_after_time(t1, &t2->start_time, t2);
+}
+
+/*
  * Call with manage_mutex held.  May take callback_mutex during call.
  */
 
 static int update_cpumask(struct cpuset *cs, char *buf)
 {
 	struct cpuset trialcs;
-	int retval;
+	int retval, i;
+	int is_load_balanced;
+	struct cgroup_iter it;
+	struct cgroup *cgrp = cs->css.cgroup;
+	struct task_struct *p, *dropped;
+	/* Never dereference latest_task, since it's not refcounted */
+	struct task_struct *latest_task = NULL;
+	struct ptr_heap heap;
+	struct timespec latest_time = { 0, 0 };
 
 	/* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
 	if (cs == &top_cpuset)
@@ -770,11 +756,13 @@
 	trialcs = *cs;
 
 	/*
-	 * We allow a cpuset's cpus_allowed to be empty; if it has attached
-	 * tasks, we'll catch it later when we validate the change and return
-	 * -ENOSPC.
+	 * An empty cpus_allowed is ok iff there are no tasks in the cpuset.
+	 * Since cpulist_parse() fails on an empty mask, we special case
+	 * that parsing.  The validate_change() call ensures that cpusets
+	 * with tasks have cpus.
 	 */
-	if (!buf[0] || (buf[0] == '\n' && !buf[1])) {
+	buf = strstrip(buf);
+	if (!*buf) {
 		cpus_clear(trialcs.cpus_allowed);
 	} else {
 		retval = cpulist_parse(buf, trialcs.cpus_allowed);
@@ -782,15 +770,79 @@
 			return retval;
 	}
 	cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map);
-	/* cpus_allowed cannot be empty for a cpuset with attached tasks. */
-	if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed))
-		return -ENOSPC;
 	retval = validate_change(cs, &trialcs);
 	if (retval < 0)
 		return retval;
+
+	/* Nothing to do if the cpus didn't change */
+	if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
+		return 0;
+	retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
+	if (retval)
+		return retval;
+
+	is_load_balanced = is_sched_load_balance(&trialcs);
+
 	mutex_lock(&callback_mutex);
 	cs->cpus_allowed = trialcs.cpus_allowed;
 	mutex_unlock(&callback_mutex);
+
+ again:
+	/*
+	 * Scan tasks in the cpuset, and update the cpumasks of any
+	 * that need an update. Since we can't call set_cpus_allowed()
+	 * while holding tasklist_lock, gather tasks to be processed
+	 * in a heap structure. If the statically-sized heap fills up,
+	 * overflow tasks that started later, and in future iterations
+	 * only consider tasks that started after the latest task in
+	 * the previous pass. This guarantees forward progress and
+	 * that we don't miss any tasks
+	 */
+	heap.size = 0;
+	cgroup_iter_start(cgrp, &it);
+	while ((p = cgroup_iter_next(cgrp, &it))) {
+		/* Only affect tasks that don't have the right cpus_allowed */
+		if (cpus_equal(p->cpus_allowed, cs->cpus_allowed))
+			continue;
+		/*
+		 * Only process tasks that started after the last task
+		 * we processed
+		 */
+		if (!started_after_time(p, &latest_time, latest_task))
+			continue;
+		dropped = heap_insert(&heap, p);
+		if (dropped == NULL) {
+			get_task_struct(p);
+		} else if (dropped != p) {
+			get_task_struct(p);
+			put_task_struct(dropped);
+		}
+	}
+	cgroup_iter_end(cgrp, &it);
+	if (heap.size) {
+		for (i = 0; i < heap.size; i++) {
+			struct task_struct *p = heap.ptrs[i];
+			if (i == 0) {
+				latest_time = p->start_time;
+				latest_task = p;
+			}
+			set_cpus_allowed(p, cs->cpus_allowed);
+			put_task_struct(p);
+		}
+		/*
+		 * If we had to process any tasks at all, scan again
+		 * in case some of them were in the middle of forking
+		 * children that didn't notice the new cpumask
+		 * restriction.  Not the most efficient way to do it,
+		 * but it avoids having to take callback_mutex in the
+		 * fork path
+		 */
+		goto again;
+	}
+	heap_free(&heap);
+	if (is_load_balanced)
+		rebuild_sched_domains();
+
 	return 0;
 }
 
@@ -839,7 +891,7 @@
 	do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
 
 	mutex_lock(&callback_mutex);
-	guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
+	guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
 	mutex_unlock(&callback_mutex);
 }
 
@@ -857,16 +909,19 @@
  * their mempolicies to the cpusets new mems_allowed.
  */
 
+static void *cpuset_being_rebound;
+
 static int update_nodemask(struct cpuset *cs, char *buf)
 {
 	struct cpuset trialcs;
 	nodemask_t oldmem;
-	struct task_struct *g, *p;
+	struct task_struct *p;
 	struct mm_struct **mmarray;
 	int i, n, ntasks;
 	int migrate;
 	int fudge;
 	int retval;
+	struct cgroup_iter it;
 
 	/*
 	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
@@ -878,29 +933,19 @@
 	trialcs = *cs;
 
 	/*
-	 * We allow a cpuset's mems_allowed to be empty; if it has attached
-	 * tasks, we'll catch it later when we validate the change and return
-	 * -ENOSPC.
+	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
+	 * Since nodelist_parse() fails on an empty mask, we special case
+	 * that parsing.  The validate_change() call ensures that cpusets
+	 * with tasks have memory.
 	 */
-	if (!buf[0] || (buf[0] == '\n' && !buf[1])) {
+	buf = strstrip(buf);
+	if (!*buf) {
 		nodes_clear(trialcs.mems_allowed);
 	} else {
 		retval = nodelist_parse(buf, trialcs.mems_allowed);
 		if (retval < 0)
 			goto done;
-		if (!nodes_intersects(trialcs.mems_allowed,
-						node_states[N_HIGH_MEMORY])) {
-			/*
-			 * error if only memoryless nodes specified.
-			 */
-			retval = -ENOSPC;
-			goto done;
-		}
 	}
-	/*
-	 * Exclude memoryless nodes.  We know that trialcs.mems_allowed
-	 * contains at least one node with memory.
-	 */
 	nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
 						node_states[N_HIGH_MEMORY]);
 	oldmem = cs->mems_allowed;
@@ -908,11 +953,6 @@
 		retval = 0;		/* Too easy - nothing to do */
 		goto done;
 	}
-	/* mems_allowed cannot be empty for a cpuset with attached tasks. */
-	if (atomic_read(&cs->count) && nodes_empty(trialcs.mems_allowed)) {
-		retval = -ENOSPC;
-		goto done;
-	}
 	retval = validate_change(cs, &trialcs);
 	if (retval < 0)
 		goto done;
@@ -922,7 +962,7 @@
 	cs->mems_generation = cpuset_mems_generation++;
 	mutex_unlock(&callback_mutex);
 
-	set_cpuset_being_rebound(cs);		/* causes mpol_copy() rebind */
+	cpuset_being_rebound = cs;		/* causes mpol_copy() rebind */
 
 	fudge = 10;				/* spare mmarray[] slots */
 	fudge += cpus_weight(cs->cpus_allowed);	/* imagine one fork-bomb/cpu */
@@ -936,13 +976,13 @@
 	 * enough mmarray[] w/o using GFP_ATOMIC.
 	 */
 	while (1) {
-		ntasks = atomic_read(&cs->count);	/* guess */
+		ntasks = cgroup_task_count(cs->css.cgroup);  /* guess */
 		ntasks += fudge;
 		mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
 		if (!mmarray)
 			goto done;
 		read_lock(&tasklist_lock);		/* block fork */
-		if (atomic_read(&cs->count) <= ntasks)
+		if (cgroup_task_count(cs->css.cgroup) <= ntasks)
 			break;				/* got enough */
 		read_unlock(&tasklist_lock);		/* try again */
 		kfree(mmarray);
@@ -951,21 +991,21 @@
 	n = 0;
 
 	/* Load up mmarray[] with mm reference for each task in cpuset. */
-	do_each_thread(g, p) {
+	cgroup_iter_start(cs->css.cgroup, &it);
+	while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
 		struct mm_struct *mm;
 
 		if (n >= ntasks) {
 			printk(KERN_WARNING
 				"Cpuset mempolicy rebind incomplete.\n");
-			continue;
+			break;
 		}
-		if (p->cpuset != cs)
-			continue;
 		mm = get_task_mm(p);
 		if (!mm)
 			continue;
 		mmarray[n++] = mm;
-	} while_each_thread(g, p);
+	}
+	cgroup_iter_end(cs->css.cgroup, &it);
 	read_unlock(&tasklist_lock);
 
 	/*
@@ -993,12 +1033,17 @@
 
 	/* We're done rebinding vma's to this cpusets new mems_allowed. */
 	kfree(mmarray);
-	set_cpuset_being_rebound(NULL);
+	cpuset_being_rebound = NULL;
 	retval = 0;
 done:
 	return retval;
 }
 
+int current_cpuset_is_being_rebound(void)
+{
+	return task_cs(current) == cpuset_being_rebound;
+}
+
 /*
  * Call with manage_mutex held.
  */
@@ -1015,6 +1060,7 @@
 /*
  * update_flag - read a 0 or a 1 in a file and update associated flag
  * bit:	the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
+ *				CS_SCHED_LOAD_BALANCE,
  *				CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
  *				CS_SPREAD_PAGE, CS_SPREAD_SLAB)
  * cs:	the cpuset to update
@@ -1028,6 +1074,7 @@
 	int turning_on;
 	struct cpuset trialcs;
 	int err;
+	int cpus_nonempty, balance_flag_changed;
 
 	turning_on = (simple_strtoul(buf, NULL, 10) != 0);
 
@@ -1040,10 +1087,18 @@
 	err = validate_change(cs, &trialcs);
 	if (err < 0)
 		return err;
+
+	cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
+	balance_flag_changed = (is_sched_load_balance(cs) !=
+		 			is_sched_load_balance(&trialcs));
+
 	mutex_lock(&callback_mutex);
 	cs->flags = trialcs.flags;
 	mutex_unlock(&callback_mutex);
 
+	if (cpus_nonempty && balance_flag_changed)
+		rebuild_sched_domains();
+
 	return 0;
 }
 
@@ -1145,85 +1200,34 @@
 	return val;
 }
 
-/*
- * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly
- * writing the path of the old cpuset in 'ppathbuf' if it needs to be
- * notified on release.
- *
- * Call holding manage_mutex.  May take callback_mutex and task_lock of
- * the task 'pid' during call.
- */
-
-static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
+static int cpuset_can_attach(struct cgroup_subsys *ss,
+			     struct cgroup *cont, struct task_struct *tsk)
 {
-	pid_t pid;
-	struct task_struct *tsk;
-	struct cpuset *oldcs;
-	cpumask_t cpus;
-	nodemask_t from, to;
-	struct mm_struct *mm;
-	int retval;
+	struct cpuset *cs = cgroup_cs(cont);
 
-	if (sscanf(pidbuf, "%d", &pid) != 1)
-		return -EIO;
 	if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
 		return -ENOSPC;
 
-	if (pid) {
-		read_lock(&tasklist_lock);
+	return security_task_setscheduler(tsk, 0, NULL);
+}
 
-		tsk = find_task_by_pid(pid);
-		if (!tsk || tsk->flags & PF_EXITING) {
-			read_unlock(&tasklist_lock);
-			return -ESRCH;
-		}
-
-		get_task_struct(tsk);
-		read_unlock(&tasklist_lock);
-
-		if ((current->euid) && (current->euid != tsk->uid)
-		    && (current->euid != tsk->suid)) {
-			put_task_struct(tsk);
-			return -EACCES;
-		}
-	} else {
-		tsk = current;
-		get_task_struct(tsk);
-	}
-
-	retval = security_task_setscheduler(tsk, 0, NULL);
-	if (retval) {
-		put_task_struct(tsk);
-		return retval;
-	}
+static void cpuset_attach(struct cgroup_subsys *ss,
+			  struct cgroup *cont, struct cgroup *oldcont,
+			  struct task_struct *tsk)
+{
+	cpumask_t cpus;
+	nodemask_t from, to;
+	struct mm_struct *mm;
+	struct cpuset *cs = cgroup_cs(cont);
+	struct cpuset *oldcs = cgroup_cs(oldcont);
 
 	mutex_lock(&callback_mutex);
-
-	task_lock(tsk);
-	oldcs = tsk->cpuset;
-	/*
-	 * After getting 'oldcs' cpuset ptr, be sure still not exiting.
-	 * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack
-	 * then fail this attach_task(), to avoid breaking top_cpuset.count.
-	 */
-	if (tsk->flags & PF_EXITING) {
-		task_unlock(tsk);
-		mutex_unlock(&callback_mutex);
-		put_task_struct(tsk);
-		return -ESRCH;
-	}
-	atomic_inc(&cs->count);
-	rcu_assign_pointer(tsk->cpuset, cs);
-	task_unlock(tsk);
-
 	guarantee_online_cpus(cs, &cpus);
 	set_cpus_allowed(tsk, cpus);
+	mutex_unlock(&callback_mutex);
 
 	from = oldcs->mems_allowed;
 	to = cs->mems_allowed;
-
-	mutex_unlock(&callback_mutex);
-
 	mm = get_task_mm(tsk);
 	if (mm) {
 		mpol_rebind_mm(mm, &to);
@@ -1232,44 +1236,36 @@
 		mmput(mm);
 	}
 
-	put_task_struct(tsk);
-	synchronize_rcu();
-	if (atomic_dec_and_test(&oldcs->count))
-		check_for_release(oldcs, ppathbuf);
-	return 0;
 }
 
 /* The various types of files and directories in a cpuset file system */
 
 typedef enum {
-	FILE_ROOT,
-	FILE_DIR,
 	FILE_MEMORY_MIGRATE,
 	FILE_CPULIST,
 	FILE_MEMLIST,
 	FILE_CPU_EXCLUSIVE,
 	FILE_MEM_EXCLUSIVE,
-	FILE_NOTIFY_ON_RELEASE,
+	FILE_SCHED_LOAD_BALANCE,
 	FILE_MEMORY_PRESSURE_ENABLED,
 	FILE_MEMORY_PRESSURE,
 	FILE_SPREAD_PAGE,
 	FILE_SPREAD_SLAB,
-	FILE_TASKLIST,
 } cpuset_filetype_t;
 
-static ssize_t cpuset_common_file_write(struct file *file,
+static ssize_t cpuset_common_file_write(struct cgroup *cont,
+					struct cftype *cft,
+					struct file *file,
 					const char __user *userbuf,
 					size_t nbytes, loff_t *unused_ppos)
 {
-	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
-	struct cftype *cft = __d_cft(file->f_path.dentry);
+	struct cpuset *cs = cgroup_cs(cont);
 	cpuset_filetype_t type = cft->private;
 	char *buffer;
-	char *pathbuf = NULL;
 	int retval = 0;
 
 	/* Crude upper limit on largest legitimate cpulist user might write. */
-	if (nbytes > 100 + 6 * max(NR_CPUS, MAX_NUMNODES))
+	if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
 		return -E2BIG;
 
 	/* +1 for nul-terminator */
@@ -1282,9 +1278,9 @@
 	}
 	buffer[nbytes] = 0;	/* nul-terminate */
 
-	mutex_lock(&manage_mutex);
+	cgroup_lock();
 
-	if (is_removed(cs)) {
+	if (cgroup_is_removed(cont)) {
 		retval = -ENODEV;
 		goto out2;
 	}
@@ -1302,8 +1298,8 @@
 	case FILE_MEM_EXCLUSIVE:
 		retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
 		break;
-	case FILE_NOTIFY_ON_RELEASE:
-		retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
+	case FILE_SCHED_LOAD_BALANCE:
+		retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer);
 		break;
 	case FILE_MEMORY_MIGRATE:
 		retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
@@ -1322,9 +1318,6 @@
 		retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
 		cs->mems_generation = cpuset_mems_generation++;
 		break;
-	case FILE_TASKLIST:
-		retval = attach_task(cs, buffer, &pathbuf);
-		break;
 	default:
 		retval = -EINVAL;
 		goto out2;
@@ -1333,30 +1326,12 @@
 	if (retval == 0)
 		retval = nbytes;
 out2:
-	mutex_unlock(&manage_mutex);
-	cpuset_release_agent(pathbuf);
+	cgroup_unlock();
 out1:
 	kfree(buffer);
 	return retval;
 }
 
-static ssize_t cpuset_file_write(struct file *file, const char __user *buf,
-						size_t nbytes, loff_t *ppos)
-{
-	ssize_t retval = 0;
-	struct cftype *cft = __d_cft(file->f_path.dentry);
-	if (!cft)
-		return -ENODEV;
-
-	/* special function ? */
-	if (cft->write)
-		retval = cft->write(file, buf, nbytes, ppos);
-	else
-		retval = cpuset_common_file_write(file, buf, nbytes, ppos);
-
-	return retval;
-}
-
 /*
  * These ascii lists should be read in a single call, by using a user
  * buffer large enough to hold the entire map.  If read in smaller
@@ -1391,11 +1366,13 @@
 	return nodelist_scnprintf(page, PAGE_SIZE, mask);
 }
 
-static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
-				size_t nbytes, loff_t *ppos)
+static ssize_t cpuset_common_file_read(struct cgroup *cont,
+				       struct cftype *cft,
+				       struct file *file,
+				       char __user *buf,
+				       size_t nbytes, loff_t *ppos)
 {
-	struct cftype *cft = __d_cft(file->f_path.dentry);
-	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+	struct cpuset *cs = cgroup_cs(cont);
 	cpuset_filetype_t type = cft->private;
 	char *page;
 	ssize_t retval = 0;
@@ -1419,8 +1396,8 @@
 	case FILE_MEM_EXCLUSIVE:
 		*s++ = is_mem_exclusive(cs) ? '1' : '0';
 		break;
-	case FILE_NOTIFY_ON_RELEASE:
-		*s++ = notify_on_release(cs) ? '1' : '0';
+	case FILE_SCHED_LOAD_BALANCE:
+		*s++ = is_sched_load_balance(cs) ? '1' : '0';
 		break;
 	case FILE_MEMORY_MIGRATE:
 		*s++ = is_memory_migrate(cs) ? '1' : '0';
@@ -1449,390 +1426,150 @@
 	return retval;
 }
 
-static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbytes,
-								loff_t *ppos)
-{
-	ssize_t retval = 0;
-	struct cftype *cft = __d_cft(file->f_path.dentry);
-	if (!cft)
-		return -ENODEV;
 
-	/* special function ? */
-	if (cft->read)
-		retval = cft->read(file, buf, nbytes, ppos);
-	else
-		retval = cpuset_common_file_read(file, buf, nbytes, ppos);
 
-	return retval;
-}
 
-static int cpuset_file_open(struct inode *inode, struct file *file)
-{
-	int err;
-	struct cftype *cft;
-
-	err = generic_file_open(inode, file);
-	if (err)
-		return err;
-
-	cft = __d_cft(file->f_path.dentry);
-	if (!cft)
-		return -ENODEV;
-	if (cft->open)
-		err = cft->open(inode, file);
-	else
-		err = 0;
-
-	return err;
-}
-
-static int cpuset_file_release(struct inode *inode, struct file *file)
-{
-	struct cftype *cft = __d_cft(file->f_path.dentry);
-	if (cft->release)
-		return cft->release(inode, file);
-	return 0;
-}
-
-/*
- * cpuset_rename - Only allow simple rename of directories in place.
- */
-static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry,
-                  struct inode *new_dir, struct dentry *new_dentry)
-{
-	if (!S_ISDIR(old_dentry->d_inode->i_mode))
-		return -ENOTDIR;
-	if (new_dentry->d_inode)
-		return -EEXIST;
-	if (old_dir != new_dir)
-		return -EIO;
-	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
-}
-
-static const struct file_operations cpuset_file_operations = {
-	.read = cpuset_file_read,
-	.write = cpuset_file_write,
-	.llseek = generic_file_llseek,
-	.open = cpuset_file_open,
-	.release = cpuset_file_release,
-};
-
-static const struct inode_operations cpuset_dir_inode_operations = {
-	.lookup = simple_lookup,
-	.mkdir = cpuset_mkdir,
-	.rmdir = cpuset_rmdir,
-	.rename = cpuset_rename,
-};
-
-static int cpuset_create_file(struct dentry *dentry, int mode)
-{
-	struct inode *inode;
-
-	if (!dentry)
-		return -ENOENT;
-	if (dentry->d_inode)
-		return -EEXIST;
-
-	inode = cpuset_new_inode(mode);
-	if (!inode)
-		return -ENOMEM;
-
-	if (S_ISDIR(mode)) {
-		inode->i_op = &cpuset_dir_inode_operations;
-		inode->i_fop = &simple_dir_operations;
-
-		/* start off with i_nlink == 2 (for "." entry) */
-		inc_nlink(inode);
-	} else if (S_ISREG(mode)) {
-		inode->i_size = 0;
-		inode->i_fop = &cpuset_file_operations;
-	}
-
-	d_instantiate(dentry, inode);
-	dget(dentry);	/* Extra count - pin the dentry in core */
-	return 0;
-}
-
-/*
- *	cpuset_create_dir - create a directory for an object.
- *	cs:	the cpuset we create the directory for.
- *		It must have a valid ->parent field
- *		And we are going to fill its ->dentry field.
- *	name:	The name to give to the cpuset directory. Will be copied.
- *	mode:	mode to set on new directory.
- */
-
-static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode)
-{
-	struct dentry *dentry = NULL;
-	struct dentry *parent;
-	int error = 0;
-
-	parent = cs->parent->dentry;
-	dentry = cpuset_get_dentry(parent, name);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-	error = cpuset_create_file(dentry, S_IFDIR | mode);
-	if (!error) {
-		dentry->d_fsdata = cs;
-		inc_nlink(parent->d_inode);
-		cs->dentry = dentry;
-	}
-	dput(dentry);
-
-	return error;
-}
-
-static int cpuset_add_file(struct dentry *dir, const struct cftype *cft)
-{
-	struct dentry *dentry;
-	int error;
-
-	mutex_lock(&dir->d_inode->i_mutex);
-	dentry = cpuset_get_dentry(dir, cft->name);
-	if (!IS_ERR(dentry)) {
-		error = cpuset_create_file(dentry, 0644 | S_IFREG);
-		if (!error)
-			dentry->d_fsdata = (void *)cft;
-		dput(dentry);
-	} else
-		error = PTR_ERR(dentry);
-	mutex_unlock(&dir->d_inode->i_mutex);
-	return error;
-}
-
-/*
- * Stuff for reading the 'tasks' file.
- *
- * Reading this file can return large amounts of data if a cpuset has
- * *lots* of attached tasks. So it may need several calls to read(),
- * but we cannot guarantee that the information we produce is correct
- * unless we produce it entirely atomically.
- *
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here).  The struct
- * ctr_struct * is stored in file->private_data.  Its resources will
- * be freed by release() when the file is closed.  The array is used
- * to sprintf the PIDs and then used by read().
- */
-
-/* cpusets_tasks_read array */
-
-struct ctr_struct {
-	char *buf;
-	int bufsz;
-};
-
-/*
- * Load into 'pidarray' up to 'npids' of the tasks using cpuset 'cs'.
- * Return actual number of pids loaded.  No need to task_lock(p)
- * when reading out p->cpuset, as we don't really care if it changes
- * on the next cycle, and we are not going to try to dereference it.
- */
-static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
-{
-	int n = 0;
-	struct task_struct *g, *p;
-
-	read_lock(&tasklist_lock);
-
-	do_each_thread(g, p) {
-		if (p->cpuset == cs) {
-			if (unlikely(n == npids))
-				goto array_full;
-			pidarray[n++] = p->pid;
-		}
-	} while_each_thread(g, p);
-
-array_full:
-	read_unlock(&tasklist_lock);
-	return n;
-}
-
-static int cmppid(const void *a, const void *b)
-{
-	return *(pid_t *)a - *(pid_t *)b;
-}
-
-/*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
- * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
- * count 'cnt' of how many chars would be written if buf were large enough.
- */
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
-{
-	int cnt = 0;
-	int i;
-
-	for (i = 0; i < npids; i++)
-		cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
-	return cnt;
-}
-
-/*
- * Handle an open on 'tasks' file.  Prepare a buffer listing the
- * process id's of tasks currently attached to the cpuset being opened.
- *
- * Does not require any specific cpuset mutexes, and does not take any.
- */
-static int cpuset_tasks_open(struct inode *unused, struct file *file)
-{
-	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
-	struct ctr_struct *ctr;
-	pid_t *pidarray;
-	int npids;
-	char c;
-
-	if (!(file->f_mode & FMODE_READ))
-		return 0;
-
-	ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
-	if (!ctr)
-		goto err0;
-
-	/*
-	 * If cpuset gets more users after we read count, we won't have
-	 * enough space - tough.  This race is indistinguishable to the
-	 * caller from the case that the additional cpuset users didn't
-	 * show up until sometime later on.
-	 */
-	npids = atomic_read(&cs->count);
-	pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-	if (!pidarray)
-		goto err1;
-
-	npids = pid_array_load(pidarray, npids, cs);
-	sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-	/* Call pid_array_to_buf() twice, first just to get bufsz */
-	ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
-	ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
-	if (!ctr->buf)
-		goto err2;
-	ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-
-	kfree(pidarray);
-	file->private_data = ctr;
-	return 0;
-
-err2:
-	kfree(pidarray);
-err1:
-	kfree(ctr);
-err0:
-	return -ENOMEM;
-}
-
-static ssize_t cpuset_tasks_read(struct file *file, char __user *buf,
-						size_t nbytes, loff_t *ppos)
-{
-	struct ctr_struct *ctr = file->private_data;
-
-	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
-}
-
-static int cpuset_tasks_release(struct inode *unused_inode, struct file *file)
-{
-	struct ctr_struct *ctr;
-
-	if (file->f_mode & FMODE_READ) {
-		ctr = file->private_data;
-		kfree(ctr->buf);
-		kfree(ctr);
-	}
-	return 0;
-}
 
 /*
  * for the common functions, 'private' gives the type of file
  */
 
-static struct cftype cft_tasks = {
-	.name = "tasks",
-	.open = cpuset_tasks_open,
-	.read = cpuset_tasks_read,
-	.release = cpuset_tasks_release,
-	.private = FILE_TASKLIST,
-};
-
 static struct cftype cft_cpus = {
 	.name = "cpus",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_CPULIST,
 };
 
 static struct cftype cft_mems = {
 	.name = "mems",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEMLIST,
 };
 
 static struct cftype cft_cpu_exclusive = {
 	.name = "cpu_exclusive",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_CPU_EXCLUSIVE,
 };
 
 static struct cftype cft_mem_exclusive = {
 	.name = "mem_exclusive",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEM_EXCLUSIVE,
 };
 
-static struct cftype cft_notify_on_release = {
-	.name = "notify_on_release",
-	.private = FILE_NOTIFY_ON_RELEASE,
+static struct cftype cft_sched_load_balance = {
+	.name = "sched_load_balance",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
+	.private = FILE_SCHED_LOAD_BALANCE,
 };
 
 static struct cftype cft_memory_migrate = {
 	.name = "memory_migrate",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEMORY_MIGRATE,
 };
 
 static struct cftype cft_memory_pressure_enabled = {
 	.name = "memory_pressure_enabled",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEMORY_PRESSURE_ENABLED,
 };
 
 static struct cftype cft_memory_pressure = {
 	.name = "memory_pressure",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEMORY_PRESSURE,
 };
 
 static struct cftype cft_spread_page = {
 	.name = "memory_spread_page",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_SPREAD_PAGE,
 };
 
 static struct cftype cft_spread_slab = {
 	.name = "memory_spread_slab",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_SPREAD_SLAB,
 };
 
-static int cpuset_populate_dir(struct dentry *cs_dentry)
+static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
 {
 	int err;
 
-	if ((err = cpuset_add_file(cs_dentry, &cft_cpus)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_mems)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
-		return err;
+	/* memory_pressure_enabled is in root cpuset only */
+	if (err == 0 && !cont->parent)
+		err = cgroup_add_file(cont, ss,
+					 &cft_memory_pressure_enabled);
 	return 0;
 }
 
 /*
+ * post_clone() is called at the end of cgroup_clone().
+ * 'cgroup' was just created automatically as a result of
+ * a cgroup_clone(), and the current task is about to
+ * be moved into 'cgroup'.
+ *
+ * Currently we refuse to set up the cgroup - thereby
+ * refusing the task to be entered, and as a result refusing
+ * the sys_unshare() or clone() which initiated it - if any
+ * sibling cpusets have exclusive cpus or mem.
+ *
+ * If this becomes a problem for some users who wish to
+ * allow that scenario, then cpuset_post_clone() could be
+ * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
+ * (and likewise for mems) to the new cgroup.
+ */
+static void cpuset_post_clone(struct cgroup_subsys *ss,
+			      struct cgroup *cgroup)
+{
+	struct cgroup *parent, *child;
+	struct cpuset *cs, *parent_cs;
+
+	parent = cgroup->parent;
+	list_for_each_entry(child, &parent->children, sibling) {
+		cs = cgroup_cs(child);
+		if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
+			return;
+	}
+	cs = cgroup_cs(cgroup);
+	parent_cs = cgroup_cs(parent);
+
+	cs->mems_allowed = parent_cs->mems_allowed;
+	cs->cpus_allowed = parent_cs->cpus_allowed;
+	return;
+}
+
+/*
  *	cpuset_create - create a cpuset
  *	parent:	cpuset that will be parent of the new cpuset.
  *	name:		name of the new cpuset. Will be strcpy'ed.
@@ -1841,106 +1578,77 @@
  *	Must be called with the mutex on the parent inode held
  */
 
-static long cpuset_create(struct cpuset *parent, const char *name, int mode)
+static struct cgroup_subsys_state *cpuset_create(
+	struct cgroup_subsys *ss,
+	struct cgroup *cont)
 {
 	struct cpuset *cs;
-	int err;
+	struct cpuset *parent;
 
+	if (!cont->parent) {
+		/* This is early initialization for the top cgroup */
+		top_cpuset.mems_generation = cpuset_mems_generation++;
+		return &top_cpuset.css;
+	}
+	parent = cgroup_cs(cont->parent);
 	cs = kmalloc(sizeof(*cs), GFP_KERNEL);
 	if (!cs)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	mutex_lock(&manage_mutex);
 	cpuset_update_task_memory_state();
 	cs->flags = 0;
-	if (notify_on_release(parent))
-		set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
 	if (is_spread_page(parent))
 		set_bit(CS_SPREAD_PAGE, &cs->flags);
 	if (is_spread_slab(parent))
 		set_bit(CS_SPREAD_SLAB, &cs->flags);
+	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 	cs->cpus_allowed = CPU_MASK_NONE;
 	cs->mems_allowed = NODE_MASK_NONE;
-	atomic_set(&cs->count, 0);
-	INIT_LIST_HEAD(&cs->sibling);
-	INIT_LIST_HEAD(&cs->children);
 	cs->mems_generation = cpuset_mems_generation++;
 	fmeter_init(&cs->fmeter);
 
 	cs->parent = parent;
-
-	mutex_lock(&callback_mutex);
-	list_add(&cs->sibling, &cs->parent->children);
 	number_of_cpusets++;
-	mutex_unlock(&callback_mutex);
-
-	err = cpuset_create_dir(cs, name, mode);
-	if (err < 0)
-		goto err;
-
-	/*
-	 * Release manage_mutex before cpuset_populate_dir() because it
-	 * will down() this new directory's i_mutex and if we race with
-	 * another mkdir, we might deadlock.
-	 */
-	mutex_unlock(&manage_mutex);
-
-	err = cpuset_populate_dir(cs->dentry);
-	/* If err < 0, we have a half-filled directory - oh well ;) */
-	return 0;
-err:
-	list_del(&cs->sibling);
-	mutex_unlock(&manage_mutex);
-	kfree(cs);
-	return err;
+	return &cs->css ;
 }
 
-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+/*
+ * Locking note on the strange update_flag() call below:
+ *
+ * If the cpuset being removed has its flag 'sched_load_balance'
+ * enabled, then simulate turning sched_load_balance off, which
+ * will call rebuild_sched_domains().  The lock_cpu_hotplug()
+ * call in rebuild_sched_domains() must not be made while holding
+ * callback_mutex.  Elsewhere the kernel nests callback_mutex inside
+ * lock_cpu_hotplug() calls.  So the reverse nesting would risk an
+ * ABBA deadlock.
+ */
+
+static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
 {
-	struct cpuset *c_parent = dentry->d_parent->d_fsdata;
+	struct cpuset *cs = cgroup_cs(cont);
 
-	/* the vfs holds inode->i_mutex already */
-	return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
-}
-
-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
-{
-	struct cpuset *cs = dentry->d_fsdata;
-	struct dentry *d;
-	struct cpuset *parent;
-	char *pathbuf = NULL;
-
-	/* the vfs holds both inode->i_mutex already */
-
-	mutex_lock(&manage_mutex);
 	cpuset_update_task_memory_state();
-	if (atomic_read(&cs->count) > 0) {
-		mutex_unlock(&manage_mutex);
-		return -EBUSY;
-	}
-	if (!list_empty(&cs->children)) {
-		mutex_unlock(&manage_mutex);
-		return -EBUSY;
-	}
-	parent = cs->parent;
-	mutex_lock(&callback_mutex);
-	set_bit(CS_REMOVED, &cs->flags);
-	list_del(&cs->sibling);	/* delete my sibling from parent->children */
-	spin_lock(&cs->dentry->d_lock);
-	d = dget(cs->dentry);
-	cs->dentry = NULL;
-	spin_unlock(&d->d_lock);
-	cpuset_d_remove_dir(d);
-	dput(d);
+
+	if (is_sched_load_balance(cs))
+		update_flag(CS_SCHED_LOAD_BALANCE, cs, "0");
+
 	number_of_cpusets--;
-	mutex_unlock(&callback_mutex);
-	if (list_empty(&parent->children))
-		check_for_release(parent, &pathbuf);
-	mutex_unlock(&manage_mutex);
-	cpuset_release_agent(pathbuf);
-	return 0;
+	kfree(cs);
 }
 
+struct cgroup_subsys cpuset_subsys = {
+	.name = "cpuset",
+	.create = cpuset_create,
+	.destroy  = cpuset_destroy,
+	.can_attach = cpuset_can_attach,
+	.attach = cpuset_attach,
+	.populate = cpuset_populate,
+	.post_clone = cpuset_post_clone,
+	.subsys_id = cpuset_subsys_id,
+	.early_init = 1,
+};
+
 /*
  * cpuset_init_early - just enough so that the calls to
  * cpuset_update_task_memory_state() in early init code
@@ -1949,13 +1657,11 @@
 
 int __init cpuset_init_early(void)
 {
-	struct task_struct *tsk = current;
-
-	tsk->cpuset = &top_cpuset;
-	tsk->cpuset->mems_generation = cpuset_mems_generation++;
+	top_cpuset.mems_generation = cpuset_mems_generation++;
 	return 0;
 }
 
+
 /**
  * cpuset_init - initialize cpusets at system boot
  *
@@ -1964,39 +1670,21 @@
 
 int __init cpuset_init(void)
 {
-	struct dentry *root;
-	int err;
+	int err = 0;
 
 	top_cpuset.cpus_allowed = CPU_MASK_ALL;
 	top_cpuset.mems_allowed = NODE_MASK_ALL;
 
 	fmeter_init(&top_cpuset.fmeter);
 	top_cpuset.mems_generation = cpuset_mems_generation++;
-
-	init_task.cpuset = &top_cpuset;
+	set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
 
 	err = register_filesystem(&cpuset_fs_type);
 	if (err < 0)
-		goto out;
-	cpuset_mount = kern_mount(&cpuset_fs_type);
-	if (IS_ERR(cpuset_mount)) {
-		printk(KERN_ERR "cpuset: could not mount!\n");
-		err = PTR_ERR(cpuset_mount);
-		cpuset_mount = NULL;
-		goto out;
-	}
-	root = cpuset_mount->mnt_sb->s_root;
-	root->d_fsdata = &top_cpuset;
-	inc_nlink(root->d_inode);
-	top_cpuset.dentry = root;
-	root->d_inode->i_op = &cpuset_dir_inode_operations;
+		return err;
+
 	number_of_cpusets = 1;
-	err = cpuset_populate_dir(root);
-	/* memory_pressure_enabled is in root cpuset only */
-	if (err == 0)
-		err = cpuset_add_file(root, &cft_memory_pressure_enabled);
-out:
-	return err;
+	return 0;
 }
 
 /*
@@ -2022,10 +1710,12 @@
 
 static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
 {
+	struct cgroup *cont;
 	struct cpuset *c;
 
 	/* Each of our child cpusets mems must be online */
-	list_for_each_entry(c, &cur->children, sibling) {
+	list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
+		c = cgroup_cs(cont);
 		guarantee_online_cpus_mems_in_subtree(c);
 		if (!cpus_empty(c->cpus_allowed))
 			guarantee_online_cpus(c, &c->cpus_allowed);
@@ -2053,7 +1743,7 @@
 
 static void common_cpu_mem_hotplug_unplug(void)
 {
-	mutex_lock(&manage_mutex);
+	cgroup_lock();
 	mutex_lock(&callback_mutex);
 
 	guarantee_online_cpus_mems_in_subtree(&top_cpuset);
@@ -2061,7 +1751,7 @@
 	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
 	mutex_unlock(&callback_mutex);
-	mutex_unlock(&manage_mutex);
+	cgroup_unlock();
 }
 
 /*
@@ -2074,8 +1764,8 @@
  * cpu_online_map on each CPU hotplug (cpuhp) event.
  */
 
-static int cpuset_handle_cpuhp(struct notifier_block *nb,
-				unsigned long phase, void *cpu)
+static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
+				unsigned long phase, void *unused_cpu)
 {
 	if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
 		return NOTIFY_DONE;
@@ -2113,109 +1803,7 @@
 }
 
 /**
- * cpuset_fork - attach newly forked task to its parents cpuset.
- * @tsk: pointer to task_struct of forking parent process.
- *
- * Description: A task inherits its parent's cpuset at fork().
- *
- * A pointer to the shared cpuset was automatically copied in fork.c
- * by dup_task_struct().  However, we ignore that copy, since it was
- * not made under the protection of task_lock(), so might no longer be
- * a valid cpuset pointer.  attach_task() might have already changed
- * current->cpuset, allowing the previously referenced cpuset to
- * be removed and freed.  Instead, we task_lock(current) and copy
- * its present value of current->cpuset for our freshly forked child.
- *
- * At the point that cpuset_fork() is called, 'current' is the parent
- * task, and the passed argument 'child' points to the child task.
- **/
 
-void cpuset_fork(struct task_struct *child)
-{
-	task_lock(current);
-	child->cpuset = current->cpuset;
-	atomic_inc(&child->cpuset->count);
-	task_unlock(current);
-}
-
-/**
- * cpuset_exit - detach cpuset from exiting task
- * @tsk: pointer to task_struct of exiting process
- *
- * Description: Detach cpuset from @tsk and release it.
- *
- * Note that cpusets marked notify_on_release force every task in
- * them to take the global manage_mutex mutex when exiting.
- * This could impact scaling on very large systems.  Be reluctant to
- * use notify_on_release cpusets where very high task exit scaling
- * is required on large systems.
- *
- * Don't even think about derefencing 'cs' after the cpuset use count
- * goes to zero, except inside a critical section guarded by manage_mutex
- * or callback_mutex.   Otherwise a zero cpuset use count is a license to
- * any other task to nuke the cpuset immediately, via cpuset_rmdir().
- *
- * This routine has to take manage_mutex, not callback_mutex, because
- * it is holding that mutex while calling check_for_release(),
- * which calls kmalloc(), so can't be called holding callback_mutex().
- *
- * the_top_cpuset_hack:
- *
- *    Set the exiting tasks cpuset to the root cpuset (top_cpuset).
- *
- *    Don't leave a task unable to allocate memory, as that is an
- *    accident waiting to happen should someone add a callout in
- *    do_exit() after the cpuset_exit() call that might allocate.
- *    If a task tries to allocate memory with an invalid cpuset,
- *    it will oops in cpuset_update_task_memory_state().
- *
- *    We call cpuset_exit() while the task is still competent to
- *    handle notify_on_release(), then leave the task attached to
- *    the root cpuset (top_cpuset) for the remainder of its exit.
- *
- *    To do this properly, we would increment the reference count on
- *    top_cpuset, and near the very end of the kernel/exit.c do_exit()
- *    code we would add a second cpuset function call, to drop that
- *    reference.  This would just create an unnecessary hot spot on
- *    the top_cpuset reference count, to no avail.
- *
- *    Normally, holding a reference to a cpuset without bumping its
- *    count is unsafe.   The cpuset could go away, or someone could
- *    attach us to a different cpuset, decrementing the count on
- *    the first cpuset that we never incremented.  But in this case,
- *    top_cpuset isn't going away, and either task has PF_EXITING set,
- *    which wards off any attach_task() attempts, or task is a failed
- *    fork, never visible to attach_task.
- *
- *    Another way to do this would be to set the cpuset pointer
- *    to NULL here, and check in cpuset_update_task_memory_state()
- *    for a NULL pointer.  This hack avoids that NULL check, for no
- *    cost (other than this way too long comment ;).
- **/
-
-void cpuset_exit(struct task_struct *tsk)
-{
-	struct cpuset *cs;
-
-	task_lock(current);
-	cs = tsk->cpuset;
-	tsk->cpuset = &top_cpuset;	/* the_top_cpuset_hack - see above */
-	task_unlock(current);
-
-	if (notify_on_release(cs)) {
-		char *pathbuf = NULL;
-
-		mutex_lock(&manage_mutex);
-		if (atomic_dec_and_test(&cs->count))
-			check_for_release(cs, &pathbuf);
-		mutex_unlock(&manage_mutex);
-		cpuset_release_agent(pathbuf);
-	} else {
-		atomic_dec(&cs->count);
-	}
-}
-
-/**
  * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
  * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
  *
@@ -2230,14 +1818,27 @@
 	cpumask_t mask;
 
 	mutex_lock(&callback_mutex);
-	task_lock(tsk);
-	guarantee_online_cpus(tsk->cpuset, &mask);
-	task_unlock(tsk);
+	mask = cpuset_cpus_allowed_locked(tsk);
 	mutex_unlock(&callback_mutex);
 
 	return mask;
 }
 
+/**
+ * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
+ * Must be  called with callback_mutex held.
+ **/
+cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk)
+{
+	cpumask_t mask;
+
+	task_lock(tsk);
+	guarantee_online_cpus(task_cs(tsk), &mask);
+	task_unlock(tsk);
+
+	return mask;
+}
+
 void cpuset_init_current_mems_allowed(void)
 {
 	current->mems_allowed = NODE_MASK_ALL;
@@ -2259,7 +1860,7 @@
 
 	mutex_lock(&callback_mutex);
 	task_lock(tsk);
-	guarantee_online_mems(tsk->cpuset, &mask);
+	guarantee_online_mems(task_cs(tsk), &mask);
 	task_unlock(tsk);
 	mutex_unlock(&callback_mutex);
 
@@ -2390,7 +1991,7 @@
 	mutex_lock(&callback_mutex);
 
 	task_lock(current);
-	cs = nearest_exclusive_ancestor(current->cpuset);
+	cs = nearest_exclusive_ancestor(task_cs(current));
 	task_unlock(current);
 
 	allowed = node_isset(node, cs->mems_allowed);
@@ -2550,14 +2151,12 @@
 
 void __cpuset_memory_pressure_bump(void)
 {
-	struct cpuset *cs;
-
 	task_lock(current);
-	cs = current->cpuset;
-	fmeter_markevent(&cs->fmeter);
+	fmeter_markevent(&task_cs(current)->fmeter);
 	task_unlock(current);
 }
 
+#ifdef CONFIG_PROC_PID_CPUSET
 /*
  * proc_cpuset_show()
  *  - Print tasks cpuset path into seq_file.
@@ -2569,11 +2168,12 @@
  *    the_top_cpuset_hack in cpuset_exit(), which sets an exiting tasks
  *    cpuset to top_cpuset.
  */
-static int proc_cpuset_show(struct seq_file *m, void *v)
+static int proc_cpuset_show(struct seq_file *m, void *unused_v)
 {
 	struct pid *pid;
 	struct task_struct *tsk;
 	char *buf;
+	struct cgroup_subsys_state *css;
 	int retval;
 
 	retval = -ENOMEM;
@@ -2588,15 +2188,15 @@
 		goto out_free;
 
 	retval = -EINVAL;
-	mutex_lock(&manage_mutex);
-
-	retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
+	cgroup_lock();
+	css = task_subsys_state(tsk, cpuset_subsys_id);
+	retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
 	if (retval < 0)
 		goto out_unlock;
 	seq_puts(m, buf);
 	seq_putc(m, '\n');
 out_unlock:
-	mutex_unlock(&manage_mutex);
+	cgroup_unlock();
 	put_task_struct(tsk);
 out_free:
 	kfree(buf);
@@ -2616,6 +2216,7 @@
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
+#endif /* CONFIG_PROC_PID_CPUSET */
 
 /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */
 char *cpuset_task_status_allowed(struct task_struct *task, char *buffer)
diff --git a/kernel/die_notifier.c b/kernel/die_notifier.c
deleted file mode 100644
index 0d98827..0000000
--- a/kernel/die_notifier.c
+++ /dev/null
@@ -1,38 +0,0 @@
-
-#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/vmalloc.h>
-#include <linux/kdebug.h>
-
-
-static ATOMIC_NOTIFIER_HEAD(die_chain);
-
-int notify_die(enum die_val val, const char *str,
-	       struct pt_regs *regs, long err, int trap, int sig)
-{
-	struct die_args args = {
-		.regs		= regs,
-		.str		= str,
-		.err		= err,
-		.trapnr		= trap,
-		.signr		= sig,
-
-	};
-
-	return atomic_notifier_call_chain(&die_chain, val, &args);
-}
-
-int register_die_notifier(struct notifier_block *nb)
-{
-	vmalloc_sync_all();
-	return atomic_notifier_chain_register(&die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_die_notifier);
-
-int unregister_die_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_die_notifier);
-
-
diff --git a/kernel/exit.c b/kernel/exit.c
index 2c704c8..f1aec27 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -31,7 +31,7 @@
 #include <linux/taskstats_kern.h>
 #include <linux/delayacct.h>
 #include <linux/freezer.h>
-#include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
 #include <linux/posix-timers.h>
@@ -148,6 +148,7 @@
 	int zap_leader;
 repeat:
 	atomic_dec(&p->user->processes);
+	proc_flush_task(p);
 	write_lock_irq(&tasklist_lock);
 	ptrace_unlink(p);
 	BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
@@ -175,7 +176,6 @@
 	}
 
 	write_unlock_irq(&tasklist_lock);
-	proc_flush_task(p);
 	release_thread(p);
 	call_rcu(&p->rcu, delayed_put_task_struct);
 
@@ -221,7 +221,7 @@
 	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 		if (p == ignored_task
 				|| p->exit_state
-				|| is_init(p->real_parent))
+				|| is_global_init(p->real_parent))
 			continue;
 		if (task_pgrp(p->real_parent) != pgrp &&
 		    task_session(p->real_parent) == task_session(p)) {
@@ -299,14 +299,14 @@
 {
 	struct task_struct *curr = current->group_leader;
 
-	if (process_session(curr) != session) {
+	if (task_session_nr(curr) != session) {
 		detach_pid(curr, PIDTYPE_SID);
-		set_signal_session(curr->signal, session);
+		set_task_session(curr, session);
 		attach_pid(curr, PIDTYPE_SID, find_pid(session));
 	}
-	if (process_group(curr) != pgrp) {
+	if (task_pgrp_nr(curr) != pgrp) {
 		detach_pid(curr, PIDTYPE_PGID);
-		curr->signal->pgrp = pgrp;
+		set_task_pgrp(curr, pgrp);
 		attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp));
 	}
 }
@@ -400,11 +400,12 @@
 	current->fs = fs;
 	atomic_inc(&fs->count);
 
-	exit_task_namespaces(current);
-	current->nsproxy = init_task.nsproxy;
-	get_task_namespaces(current);
+	if (current->nsproxy != init_task.nsproxy) {
+		get_nsproxy(init_task.nsproxy);
+		switch_task_namespaces(current, init_task.nsproxy);
+	}
 
- 	exit_files(current);
+	exit_files(current);
 	current->files = init_task.files;
 	atomic_inc(&current->files->count);
 
@@ -492,7 +493,7 @@
 }
 EXPORT_SYMBOL(reset_files_struct);
 
-static inline void __exit_files(struct task_struct *tsk)
+static void __exit_files(struct task_struct *tsk)
 {
 	struct files_struct * files = tsk->files;
 
@@ -509,7 +510,7 @@
 	__exit_files(tsk);
 }
 
-static inline void __put_fs_struct(struct fs_struct *fs)
+static void __put_fs_struct(struct fs_struct *fs)
 {
 	/* No need to hold fs->lock if we are killing it */
 	if (atomic_dec_and_test(&fs->count)) {
@@ -530,7 +531,7 @@
 	__put_fs_struct(fs);
 }
 
-static inline void __exit_fs(struct task_struct *tsk)
+static void __exit_fs(struct task_struct *tsk)
 {
 	struct fs_struct * fs = tsk->fs;
 
@@ -665,19 +666,22 @@
  * the child reaper process (ie "init") in our pid
  * space.
  */
-static void
-forget_original_parent(struct task_struct *father, struct list_head *to_release)
+static void forget_original_parent(struct task_struct *father)
 {
-	struct task_struct *p, *reaper = father;
-	struct list_head *_p, *_n;
+	struct task_struct *p, *n, *reaper = father;
+	struct list_head ptrace_dead;
+
+	INIT_LIST_HEAD(&ptrace_dead);
+
+	write_lock_irq(&tasklist_lock);
 
 	do {
 		reaper = next_thread(reaper);
 		if (reaper == father) {
-			reaper = child_reaper(father);
+			reaper = task_child_reaper(father);
 			break;
 		}
-	} while (reaper->exit_state);
+	} while (reaper->flags & PF_EXITING);
 
 	/*
 	 * There are only two places where our children can be:
@@ -687,9 +691,8 @@
 	 *
 	 * Search them and reparent children.
 	 */
-	list_for_each_safe(_p, _n, &father->children) {
+	list_for_each_entry_safe(p, n, &father->children, sibling) {
 		int ptrace;
-		p = list_entry(_p, struct task_struct, sibling);
 
 		ptrace = p->ptrace;
 
@@ -715,13 +718,23 @@
 		 * while it was being traced by us, to be able to see it in wait4.
 		 */
 		if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
-			list_add(&p->ptrace_list, to_release);
+			list_add(&p->ptrace_list, &ptrace_dead);
 	}
-	list_for_each_safe(_p, _n, &father->ptrace_children) {
-		p = list_entry(_p, struct task_struct, ptrace_list);
+
+	list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
 		p->real_parent = reaper;
 		reparent_thread(p, father, 1);
 	}
+
+	write_unlock_irq(&tasklist_lock);
+	BUG_ON(!list_empty(&father->children));
+	BUG_ON(!list_empty(&father->ptrace_children));
+
+	list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
+		list_del_init(&p->ptrace_list);
+		release_task(p);
+	}
+
 }
 
 /*
@@ -732,7 +745,6 @@
 {
 	int state;
 	struct task_struct *t;
-	struct list_head ptrace_dead, *_p, *_n;
 	struct pid *pgrp;
 
 	if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
@@ -753,8 +765,6 @@
 		spin_unlock_irq(&tsk->sighand->siglock);
 	}
 
-	write_lock_irq(&tasklist_lock);
-
 	/*
 	 * This does two things:
 	 *
@@ -763,12 +773,10 @@
 	 *	as a result of our exiting, and if they have any stopped
 	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 	 */
+	forget_original_parent(tsk);
+	exit_task_namespaces(tsk);
 
-	INIT_LIST_HEAD(&ptrace_dead);
-	forget_original_parent(tsk, &ptrace_dead);
-	BUG_ON(!list_empty(&tsk->children));
-	BUG_ON(!list_empty(&tsk->ptrace_children));
-
+	write_lock_irq(&tasklist_lock);
 	/*
 	 * Check to see if any process groups have become orphaned
 	 * as a result of our exiting, and if they have any stopped
@@ -792,7 +800,7 @@
 	/* Let father know we died
 	 *
 	 * Thread signals are configurable, but you aren't going to use
-	 * that to send signals to arbitary processes. 
+	 * that to send signals to arbitary processes.
 	 * That stops right now.
 	 *
 	 * If the parent exec id doesn't match the exec id we saved
@@ -833,12 +841,6 @@
 
 	write_unlock_irq(&tasklist_lock);
 
-	list_for_each_safe(_p, _n, &ptrace_dead) {
-		list_del_init(_p);
-		t = list_entry(_p, struct task_struct, ptrace_list);
-		release_task(t);
-	}
-
 	/* If the process is dead, release it - nobody will wait for it */
 	if (state == EXIT_DEAD)
 		release_task(tsk);
@@ -874,10 +876,35 @@
 
 static inline void exit_child_reaper(struct task_struct *tsk)
 {
-	if (likely(tsk->group_leader != child_reaper(tsk)))
+	if (likely(tsk->group_leader != task_child_reaper(tsk)))
 		return;
 
-	panic("Attempted to kill init!");
+	if (tsk->nsproxy->pid_ns == &init_pid_ns)
+		panic("Attempted to kill init!");
+
+	/*
+	 * @tsk is the last thread in the 'cgroup-init' and is exiting.
+	 * Terminate all remaining processes in the namespace and reap them
+	 * before exiting @tsk.
+	 *
+	 * Note that @tsk (last thread of cgroup-init) may not necessarily
+	 * be the child-reaper (i.e main thread of cgroup-init) of the
+	 * namespace i.e the child_reaper may have already exited.
+	 *
+	 * Even after a child_reaper exits, we let it inherit orphaned children,
+	 * because, pid_ns->child_reaper remains valid as long as there is
+	 * at least one living sub-thread in the cgroup init.
+
+	 * This living sub-thread of the cgroup-init will be notified when
+	 * a child inherited by the 'child-reaper' exits (do_notify_parent()
+	 * uses __group_send_sig_info()). Further, when reaping child processes,
+	 * do_wait() iterates over children of all living sub threads.
+
+	 * i.e even though 'child_reaper' thread is listed as the parent of the
+	 * orphaned children, any living sub-thread in the cgroup-init can
+	 * perform the role of the child_reaper.
+	 */
+	zap_pid_ns_processes(tsk->nsproxy->pid_ns);
 }
 
 fastcall NORET_TYPE void do_exit(long code)
@@ -932,7 +959,7 @@
 
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
-				current->comm, current->pid,
+				current->comm, task_pid_nr(current),
 				preempt_count());
 
 	acct_update_integrals(tsk);
@@ -972,7 +999,7 @@
 	__exit_fs(tsk);
 	check_stack_usage();
 	exit_thread();
-	cpuset_exit(tsk);
+	cgroup_exit(tsk, 1);
 	exit_keys(tsk);
 
 	if (group_dead && tsk->signal->leader)
@@ -983,7 +1010,6 @@
 		module_put(tsk->binfmt->module);
 
 	proc_exit_connector(tsk);
-	exit_task_namespaces(tsk);
 	exit_notify(tsk);
 #ifdef CONFIG_NUMA
 	mpol_free(tsk->mempolicy);
@@ -1086,15 +1112,17 @@
 static int eligible_child(pid_t pid, int options, struct task_struct *p)
 {
 	int err;
+	struct pid_namespace *ns;
 
+	ns = current->nsproxy->pid_ns;
 	if (pid > 0) {
-		if (p->pid != pid)
+		if (task_pid_nr_ns(p, ns) != pid)
 			return 0;
 	} else if (!pid) {
-		if (process_group(p) != process_group(current))
+		if (task_pgrp_nr_ns(p, ns) != task_pgrp_vnr(current))
 			return 0;
 	} else if (pid != -1) {
-		if (process_group(p) != -pid)
+		if (task_pgrp_nr_ns(p, ns) != -pid)
 			return 0;
 	}
 
@@ -1164,9 +1192,12 @@
 {
 	unsigned long state;
 	int retval, status, traced;
+	struct pid_namespace *ns;
+
+	ns = current->nsproxy->pid_ns;
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = task_pid_nr_ns(p, ns);
 		uid_t uid = p->uid;
 		int exit_code = p->exit_code;
 		int why, status;
@@ -1285,11 +1316,11 @@
 			retval = put_user(status, &infop->si_status);
 	}
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (!retval)
-		retval = p->pid;
+		retval = task_pid_nr_ns(p, ns);
 
 	if (traced) {
 		write_lock_irq(&tasklist_lock);
@@ -1326,6 +1357,7 @@
 			     int __user *stat_addr, struct rusage __user *ru)
 {
 	int retval, exit_code;
+	struct pid_namespace *ns;
 
 	if (!p->exit_code)
 		return 0;
@@ -1344,11 +1376,12 @@
 	 * keep holding onto the tasklist_lock while we call getrusage and
 	 * possibly take page faults for user memory.
 	 */
+	ns = current->nsproxy->pid_ns;
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = task_pid_nr_ns(p, ns);
 		uid_t uid = p->uid;
 		int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
 
@@ -1419,11 +1452,11 @@
 	if (!retval && infop)
 		retval = put_user(exit_code, &infop->si_status);
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (!retval)
-		retval = p->pid;
+		retval = task_pid_nr_ns(p, ns);
 	put_task_struct(p);
 
 	BUG_ON(!retval);
@@ -1443,6 +1476,7 @@
 	int retval;
 	pid_t pid;
 	uid_t uid;
+	struct pid_namespace *ns;
 
 	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
 		return 0;
@@ -1457,7 +1491,8 @@
 		p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
 	spin_unlock_irq(&p->sighand->siglock);
 
-	pid = p->pid;
+	ns = current->nsproxy->pid_ns;
+	pid = task_pid_nr_ns(p, ns);
 	uid = p->uid;
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
@@ -1468,7 +1503,7 @@
 		if (!retval && stat_addr)
 			retval = put_user(0xffff, stat_addr);
 		if (!retval)
-			retval = p->pid;
+			retval = task_pid_nr_ns(p, ns);
 	} else {
 		retval = wait_noreap_copyout(p, pid, uid,
 					     CLD_CONTINUED, SIGCONT,
@@ -1517,12 +1552,9 @@
 	tsk = current;
 	do {
 		struct task_struct *p;
-		struct list_head *_p;
 		int ret;
 
-		list_for_each(_p,&tsk->children) {
-			p = list_entry(_p, struct task_struct, sibling);
-
+		list_for_each_entry(p, &tsk->children, sibling) {
 			ret = eligible_child(pid, options, p);
 			if (!ret)
 				continue;
@@ -1604,9 +1636,8 @@
 			}
 		}
 		if (!flag) {
-			list_for_each(_p, &tsk->ptrace_children) {
-				p = list_entry(_p, struct task_struct,
-						ptrace_list);
+			list_for_each_entry(p, &tsk->ptrace_children,
+					    ptrace_list) {
 				if (!eligible_child(pid, options, p))
 					continue;
 				flag = 1;
diff --git a/kernel/fork.c b/kernel/fork.c
index 2ce28f1..ddafdfa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -29,7 +29,7 @@
 #include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
-#include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/syscalls.h>
@@ -50,6 +50,7 @@
 #include <linux/taskstats_kern.h>
 #include <linux/random.h>
 #include <linux/tty.h>
+#include <linux/proc_fs.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -116,7 +117,7 @@
 
 void __put_task_struct(struct task_struct *tsk)
 {
-	WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+	WARN_ON(!tsk->exit_state);
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
@@ -205,7 +206,7 @@
 }
 
 #ifdef CONFIG_MMU
-static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	struct vm_area_struct *mpnt, *tmp, **pprev;
 	struct rb_node **rb_link, *rb_parent;
@@ -583,7 +584,7 @@
 	return retval;
 }
 
-static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 {
 	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
 	/* We don't need to lock fs - think why ;-) */
@@ -615,7 +616,7 @@
 
 EXPORT_SYMBOL_GPL(copy_fs_struct);
 
-static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 {
 	if (clone_flags & CLONE_FS) {
 		atomic_inc(&current->fs->count);
@@ -818,7 +819,7 @@
 
 EXPORT_SYMBOL(unshare_files);
 
-static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct sighand_struct *sig;
 
@@ -841,7 +842,7 @@
 		kmem_cache_free(sighand_cachep, sighand);
 }
 
-static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 	int ret;
@@ -923,7 +924,7 @@
 	kmem_cache_free(signal_cachep, sig);
 }
 
-static inline void cleanup_signal(struct task_struct *tsk)
+static void cleanup_signal(struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
 
@@ -933,7 +934,7 @@
 		__cleanup_signal(sig);
 }
 
-static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
+static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long new_flags = p->flags;
 
@@ -949,10 +950,10 @@
 {
 	current->clear_child_tid = tidptr;
 
-	return current->pid;
+	return task_pid_vnr(current);
 }
 
-static inline void rt_mutex_init_task(struct task_struct *p)
+static void rt_mutex_init_task(struct task_struct *p)
 {
 	spin_lock_init(&p->pi_lock);
 #ifdef CONFIG_RT_MUTEXES
@@ -973,12 +974,12 @@
 					unsigned long stack_start,
 					struct pt_regs *regs,
 					unsigned long stack_size,
-					int __user *parent_tidptr,
 					int __user *child_tidptr,
 					struct pid *pid)
 {
 	int retval;
-	struct task_struct *p = NULL;
+	struct task_struct *p;
+	int cgroup_callbacks_done = 0;
 
 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
@@ -1042,12 +1043,6 @@
 	p->did_exec = 0;
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
 	copy_flags(clone_flags, p);
-	p->pid = pid_nr(pid);
-	retval = -EFAULT;
-	if (clone_flags & CLONE_PARENT_SETTID)
-		if (put_user(p->pid, parent_tidptr))
-			goto bad_fork_cleanup_delays_binfmt;
-
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
 	p->vfork_done = NULL;
@@ -1087,13 +1082,13 @@
 #endif
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	cpuset_fork(p);
+	cgroup_fork(p);
 #ifdef CONFIG_NUMA
  	p->mempolicy = mpol_copy(p->mempolicy);
  	if (IS_ERR(p->mempolicy)) {
  		retval = PTR_ERR(p->mempolicy);
  		p->mempolicy = NULL;
- 		goto bad_fork_cleanup_cpuset;
+ 		goto bad_fork_cleanup_cgroup;
  	}
 	mpol_fix_fork_child_flag(p);
 #endif
@@ -1126,10 +1121,6 @@
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
 
-	p->tgid = p->pid;
-	if (clone_flags & CLONE_THREAD)
-		p->tgid = current->tgid;
-
 	if ((retval = security_task_alloc(p)))
 		goto bad_fork_cleanup_policy;
 	if ((retval = audit_alloc(p)))
@@ -1155,6 +1146,24 @@
 	if (retval)
 		goto bad_fork_cleanup_namespaces;
 
+	if (pid != &init_struct_pid) {
+		retval = -ENOMEM;
+		pid = alloc_pid(task_active_pid_ns(p));
+		if (!pid)
+			goto bad_fork_cleanup_namespaces;
+
+		if (clone_flags & CLONE_NEWPID) {
+			retval = pid_ns_prepare_proc(task_active_pid_ns(p));
+			if (retval < 0)
+				goto bad_fork_free_pid;
+		}
+	}
+
+	p->pid = pid_nr(pid);
+	p->tgid = p->pid;
+	if (clone_flags & CLONE_THREAD)
+		p->tgid = current->tgid;
+
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 	/*
 	 * Clear TID on mm_release()?
@@ -1204,6 +1213,12 @@
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
+	/* Now that the task is set up, run cgroup callbacks if
+	 * necessary. We need to run them before the task is visible
+	 * on the tasklist. */
+	cgroup_fork_callbacks(p);
+	cgroup_callbacks_done = 1;
+
 	/* Need tasklist lock for parent etc handling! */
 	write_lock_irq(&tasklist_lock);
 
@@ -1246,7 +1261,7 @@
 		spin_unlock(&current->sighand->siglock);
 		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
-		goto bad_fork_cleanup_namespaces;
+		goto bad_fork_free_pid;
 	}
 
 	if (clone_flags & CLONE_THREAD) {
@@ -1275,11 +1290,22 @@
 			__ptrace_link(p, current->parent);
 
 		if (thread_group_leader(p)) {
-			p->signal->tty = current->signal->tty;
-			p->signal->pgrp = process_group(current);
-			set_signal_session(p->signal, process_session(current));
-			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
-			attach_pid(p, PIDTYPE_SID, task_session(current));
+			if (clone_flags & CLONE_NEWPID) {
+				p->nsproxy->pid_ns->child_reaper = p;
+				p->signal->tty = NULL;
+				set_task_pgrp(p, p->pid);
+				set_task_session(p, p->pid);
+				attach_pid(p, PIDTYPE_PGID, pid);
+				attach_pid(p, PIDTYPE_SID, pid);
+			} else {
+				p->signal->tty = current->signal->tty;
+				set_task_pgrp(p, task_pgrp_nr(current));
+				set_task_session(p, task_session_nr(current));
+				attach_pid(p, PIDTYPE_PGID,
+						task_pgrp(current));
+				attach_pid(p, PIDTYPE_SID,
+						task_session(current));
+			}
 
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
@@ -1292,8 +1318,12 @@
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
+	cgroup_post_fork(p);
 	return p;
 
+bad_fork_free_pid:
+	if (pid != &init_struct_pid)
+		free_pid(pid);
 bad_fork_cleanup_namespaces:
 	exit_task_namespaces(p);
 bad_fork_cleanup_keys:
@@ -1318,10 +1348,9 @@
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
 	mpol_free(p->mempolicy);
-bad_fork_cleanup_cpuset:
+bad_fork_cleanup_cgroup:
 #endif
-	cpuset_exit(p);
-bad_fork_cleanup_delays_binfmt:
+	cgroup_exit(p, cgroup_callbacks_done);
 	delayacct_tsk_free(p);
 	if (p->binfmt)
 		module_put(p->binfmt->module);
@@ -1348,7 +1377,7 @@
 	struct task_struct *task;
 	struct pt_regs regs;
 
-	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL,
+	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
 				&init_struct_pid);
 	if (!IS_ERR(task))
 		init_idle(task, cpu);
@@ -1356,7 +1385,7 @@
 	return task;
 }
 
-static inline int fork_traceflag (unsigned clone_flags)
+static int fork_traceflag(unsigned clone_flags)
 {
 	if (clone_flags & CLONE_UNTRACED)
 		return 0;
@@ -1387,19 +1416,16 @@
 {
 	struct task_struct *p;
 	int trace = 0;
-	struct pid *pid = alloc_pid();
 	long nr;
 
-	if (!pid)
-		return -EAGAIN;
-	nr = pid->nr;
 	if (unlikely(current->ptrace)) {
 		trace = fork_traceflag (clone_flags);
 		if (trace)
 			clone_flags |= CLONE_PTRACE;
 	}
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+	p = copy_process(clone_flags, stack_start, regs, stack_size,
+			child_tidptr, NULL);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1407,6 +1433,17 @@
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		/*
+		 * this is enough to call pid_nr_ns here, but this if
+		 * improves optimisation of regular fork()
+		 */
+		nr = (clone_flags & CLONE_NEWPID) ?
+			task_pid_nr_ns(p, current->nsproxy->pid_ns) :
+				task_pid_vnr(p);
+
+		if (clone_flags & CLONE_PARENT_SETTID)
+			put_user(nr, parent_tidptr);
+
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
@@ -1440,7 +1477,6 @@
 			}
 		}
 	} else {
-		free_pid(pid);
 		nr = PTR_ERR(p);
 	}
 	return nr;
@@ -1485,7 +1521,7 @@
  * Check constraints on flags passed to the unshare system call and
  * force unsharing of additional process context as appropriate.
  */
-static inline void check_unshare_flags(unsigned long *flags_ptr)
+static void check_unshare_flags(unsigned long *flags_ptr)
 {
 	/*
 	 * If unsharing a thread from a thread group, must also
@@ -1617,7 +1653,7 @@
 	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 	struct files_struct *fd, *new_fd = NULL;
 	struct sem_undo_list *new_ulist = NULL;
-	struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+	struct nsproxy *new_nsproxy = NULL;
 
 	check_unshare_flags(&unshare_flags);
 
@@ -1647,14 +1683,13 @@
 
 	if (new_fs ||  new_mm || new_fd || new_ulist || new_nsproxy) {
 
-		task_lock(current);
-
 		if (new_nsproxy) {
-			old_nsproxy = current->nsproxy;
-			current->nsproxy = new_nsproxy;
-			new_nsproxy = old_nsproxy;
+			switch_task_namespaces(current, new_nsproxy);
+			new_nsproxy = NULL;
 		}
 
+		task_lock(current);
+
 		if (new_fs) {
 			fs = current->fs;
 			current->fs = new_fs;
diff --git a/kernel/futex.c b/kernel/futex.c
index e45a65e..32710451 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -53,6 +53,9 @@
 #include <linux/signal.h>
 #include <linux/module.h>
 #include <linux/magic.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
+
 #include <asm/futex.h>
 
 #include "rtmutex_common.h"
@@ -443,8 +446,7 @@
 	struct task_struct *p;
 
 	rcu_read_lock();
-	p = find_task_by_pid(pid);
-
+	p = find_task_by_vpid(pid);
 	if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
 		p = ERR_PTR(-ESRCH);
 	else
@@ -653,7 +655,7 @@
 	if (!(uval & FUTEX_OWNER_DIED)) {
 		int ret = 0;
 
-		newval = FUTEX_WAITERS | new_owner->pid;
+		newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
 
 		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
 
@@ -1106,7 +1108,7 @@
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
 				struct task_struct *curr)
 {
-	u32 newtid = curr->pid | FUTEX_WAITERS;
+	u32 newtid = task_pid_vnr(curr) | FUTEX_WAITERS;
 	struct futex_pi_state *pi_state = q->pi_state;
 	u32 uval, curval, newval;
 	int ret;
@@ -1368,7 +1370,7 @@
 	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
 	 * the locks. It will most likely not succeed.
 	 */
-	newval = current->pid;
+	newval = task_pid_vnr(current);
 
 	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
 
@@ -1379,7 +1381,7 @@
 	 * Detect deadlocks. In case of REQUEUE_PI this is a valid
 	 * situation and we return success to user space.
 	 */
-	if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
+	if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
 		ret = -EDEADLK;
 		goto out_unlock_release_sem;
 	}
@@ -1408,7 +1410,7 @@
 	 */
 	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
 		/* Keep the OWNER_DIED bit */
-		newval = (curval & ~FUTEX_TID_MASK) | current->pid;
+		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
 		ownerdied = 0;
 		lock_taken = 1;
 	}
@@ -1587,7 +1589,7 @@
 	/*
 	 * We release only a lock we actually own:
 	 */
-	if ((uval & FUTEX_TID_MASK) != current->pid)
+	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
 		return -EPERM;
 	/*
 	 * First take all the futex related locks:
@@ -1608,7 +1610,7 @@
 	 * anyone else up:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED))
-		uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0);
+		uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
 
 
 	if (unlikely(uval == -EFAULT))
@@ -1617,7 +1619,7 @@
 	 * Rare case: we managed to release the lock atomically,
 	 * no need to wake anyone else up:
 	 */
-	if (unlikely(uval == current->pid))
+	if (unlikely(uval == task_pid_vnr(current)))
 		goto out_unlock;
 
 	/*
@@ -1854,7 +1856,7 @@
 
 		ret = -ESRCH;
 		rcu_read_lock();
-		p = find_task_by_pid(pid);
+		p = find_task_by_vpid(pid);
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
@@ -1887,7 +1889,7 @@
 	if (get_user(uval, uaddr))
 		return -1;
 
-	if ((uval & FUTEX_TID_MASK) == curr->pid) {
+	if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
 		/*
 		 * Ok, this dying thread is truly holding a futex
 		 * of interest. Set the OWNER_DIED bit atomically
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 2c2e295..00b5726 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -8,6 +8,7 @@
 
 #include <linux/linkage.h>
 #include <linux/compat.h>
+#include <linux/nsproxy.h>
 #include <linux/futex.h>
 
 #include <asm/uaccess.h>
@@ -124,7 +125,7 @@
 
 		ret = -ESRCH;
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_vpid(pid);
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index e9f1b4e..aa74a1e 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -51,7 +51,7 @@
 
 int kexec_should_crash(struct task_struct *p)
 {
-	if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops)
+	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
 		return 1;
 	return 0;
 }
@@ -1146,6 +1146,172 @@
 }
 module_init(crash_notes_memory_init)
 
+
+/*
+ * parsing the "crashkernel" commandline
+ *
+ * this code is intended to be called from architecture specific code
+ */
+
+
+/*
+ * This function parses command lines in the format
+ *
+ *   crashkernel=ramsize-range:size[,...][@offset]
+ *
+ * The function returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_mem(char 			*cmdline,
+					unsigned long long	system_ram,
+					unsigned long long	*crash_size,
+					unsigned long long	*crash_base)
+{
+	char *cur = cmdline, *tmp;
+
+	/* for each entry of the comma-separated list */
+	do {
+		unsigned long long start, end = ULLONG_MAX, size;
+
+		/* get the start of the range */
+		start = memparse(cur, &tmp);
+		if (cur == tmp) {
+			pr_warning("crashkernel: Memory value expected\n");
+			return -EINVAL;
+		}
+		cur = tmp;
+		if (*cur != '-') {
+			pr_warning("crashkernel: '-' expected\n");
+			return -EINVAL;
+		}
+		cur++;
+
+		/* if no ':' is here, than we read the end */
+		if (*cur != ':') {
+			end = memparse(cur, &tmp);
+			if (cur == tmp) {
+				pr_warning("crashkernel: Memory "
+						"value expected\n");
+				return -EINVAL;
+			}
+			cur = tmp;
+			if (end <= start) {
+				pr_warning("crashkernel: end <= start\n");
+				return -EINVAL;
+			}
+		}
+
+		if (*cur != ':') {
+			pr_warning("crashkernel: ':' expected\n");
+			return -EINVAL;
+		}
+		cur++;
+
+		size = memparse(cur, &tmp);
+		if (cur == tmp) {
+			pr_warning("Memory value expected\n");
+			return -EINVAL;
+		}
+		cur = tmp;
+		if (size >= system_ram) {
+			pr_warning("crashkernel: invalid size\n");
+			return -EINVAL;
+		}
+
+		/* match ? */
+		if (system_ram >= start && system_ram <= end) {
+			*crash_size = size;
+			break;
+		}
+	} while (*cur++ == ',');
+
+	if (*crash_size > 0) {
+		while (*cur != ' ' && *cur != '@')
+			cur++;
+		if (*cur == '@') {
+			cur++;
+			*crash_base = memparse(cur, &tmp);
+			if (cur == tmp) {
+				pr_warning("Memory value expected "
+						"after '@'\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * That function parses "simple" (old) crashkernel command lines like
+ *
+ * 	crashkernel=size[@offset]
+ *
+ * It returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_simple(char 		*cmdline,
+					   unsigned long long 	*crash_size,
+					   unsigned long long 	*crash_base)
+{
+	char *cur = cmdline;
+
+	*crash_size = memparse(cmdline, &cur);
+	if (cmdline == cur) {
+		pr_warning("crashkernel: memory value expected\n");
+		return -EINVAL;
+	}
+
+	if (*cur == '@')
+		*crash_base = memparse(cur+1, &cur);
+
+	return 0;
+}
+
+/*
+ * That function is the entry point for command line parsing and should be
+ * called from the arch-specific code.
+ */
+int __init parse_crashkernel(char 		 *cmdline,
+			     unsigned long long system_ram,
+			     unsigned long long *crash_size,
+			     unsigned long long *crash_base)
+{
+	char 	*p = cmdline, *ck_cmdline = NULL;
+	char	*first_colon, *first_space;
+
+	BUG_ON(!crash_size || !crash_base);
+	*crash_size = 0;
+	*crash_base = 0;
+
+	/* find crashkernel and use the last one if there are more */
+	p = strstr(p, "crashkernel=");
+	while (p) {
+		ck_cmdline = p;
+		p = strstr(p+1, "crashkernel=");
+	}
+
+	if (!ck_cmdline)
+		return -EINVAL;
+
+	ck_cmdline += 12; /* strlen("crashkernel=") */
+
+	/*
+	 * if the commandline contains a ':', then that's the extended
+	 * syntax -- if not, it must be the classic syntax
+	 */
+	first_colon = strchr(ck_cmdline, ':');
+	first_space = strchr(ck_cmdline, ' ');
+	if (first_colon && (!first_space || first_colon < first_space))
+		return parse_crashkernel_mem(ck_cmdline, system_ram,
+				crash_size, crash_base);
+	else
+		return parse_crashkernel_simple(ck_cmdline, crash_size,
+				crash_base);
+
+	return 0;
+}
+
+
+
 void crash_save_vmcoreinfo(void)
 {
 	u32 *buf;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index a6f1ee9..55fe0c7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -511,11 +511,11 @@
 	int i, depth = curr->lockdep_depth;
 
 	if (!depth) {
-		printk("no locks held by %s/%d.\n", curr->comm, curr->pid);
+		printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
 		return;
 	}
 	printk("%d lock%s held by %s/%d:\n",
-		depth, depth > 1 ? "s" : "", curr->comm, curr->pid);
+		depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
 
 	for (i = 0; i < depth; i++) {
 		printk(" #%d: ", i);
@@ -904,7 +904,7 @@
 	print_kernel_version();
 	printk(  "-------------------------------------------------------\n");
 	printk("%s/%d is trying to acquire lock:\n",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lock(check_source);
 	printk("\nbut task is already holding lock:\n");
 	print_lock(check_target);
@@ -1085,7 +1085,7 @@
 	print_kernel_version();
 	printk(  "------------------------------------------------------\n");
 	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
-		curr->comm, curr->pid,
+		curr->comm, task_pid_nr(curr),
 		curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
 		curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
 		curr->hardirqs_enabled,
@@ -1237,7 +1237,7 @@
 	print_kernel_version();
 	printk(  "---------------------------------------------\n");
 	printk("%s/%d is trying to acquire lock:\n",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lock(next);
 	printk("\nbut task is already holding lock:\n");
 	print_lock(prev);
@@ -1521,7 +1521,7 @@
 }
 
 static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
-	       	struct held_lock *hlock, int chain_head, u64 chain_key)
+		struct held_lock *hlock, int chain_head, u64 chain_key)
 {
 	/*
 	 * Trylock needs to maintain the stack of held locks, but it
@@ -1641,7 +1641,7 @@
 		usage_str[prev_bit], usage_str[new_bit]);
 
 	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
-		curr->comm, curr->pid,
+		curr->comm, task_pid_nr(curr),
 		trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
 		trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
 		trace_hardirqs_enabled(curr),
@@ -1694,7 +1694,7 @@
 	print_kernel_version();
 	printk(  "---------------------------------------------------------\n");
 	printk("%s/%d just changed the state of lock:\n",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lock(this);
 	if (forwards)
 		printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
@@ -2487,7 +2487,7 @@
 	printk(  "[ BUG: bad unlock balance detected! ]\n");
 	printk(  "-------------------------------------\n");
 	printk("%s/%d is trying to release lock (",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
 	printk(") at:\n");
 	print_ip_sym(ip);
@@ -2737,7 +2737,7 @@
 	printk(  "[ BUG: bad contention detected! ]\n");
 	printk(  "---------------------------------\n");
 	printk("%s/%d is trying to contend lock (",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
 	printk(") at:\n");
 	print_ip_sym(ip);
@@ -3072,7 +3072,7 @@
 	printk(  "[ BUG: held lock freed! ]\n");
 	printk(  "-------------------------\n");
 	printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
-		curr->comm, curr->pid, mem_from, mem_to-1);
+		curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
 	print_lock(hlock);
 	lockdep_print_held_locks(curr);
 
@@ -3125,7 +3125,7 @@
 	printk(  "[ BUG: lock held at task exit time! ]\n");
 	printk(  "-------------------------------------\n");
 	printk("%s/%d is exiting with locks still held!\n",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	lockdep_print_held_locks(curr);
 
 	printk("\nstack backtrace:\n");
diff --git a/kernel/marker.c b/kernel/marker.c
new file mode 100644
index 0000000..ccb48d9
--- /dev/null
+++ b/kernel/marker.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/marker.h>
+#include <linux/err.h>
+
+extern struct marker __start___markers[];
+extern struct marker __stop___markers[];
+
+/*
+ * module_mutex nests inside markers_mutex. Markers mutex protects the builtin
+ * and module markers, the hash table and deferred_sync.
+ */
+static DEFINE_MUTEX(markers_mutex);
+
+/*
+ * Marker deferred synchronization.
+ * Upon marker probe_unregister, we delay call to synchronize_sched() to
+ * accelerate mass unregistration (only when there is no more reference to a
+ * given module do we call synchronize_sched()). However, we need to make sure
+ * every critical region has ended before we re-arm a marker that has been
+ * unregistered and then registered back with a different probe data.
+ */
+static int deferred_sync;
+
+/*
+ * Marker hash table, containing the active markers.
+ * Protected by module_mutex.
+ */
+#define MARKER_HASH_BITS 6
+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+
+struct marker_entry {
+	struct hlist_node hlist;
+	char *format;
+	marker_probe_func *probe;
+	void *private;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	char name[0];	/* Contains name'\0'format'\0' */
+};
+
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
+
+/**
+ * __mark_empty_function - Empty probe callback
+ * @mdata: pointer of type const struct marker
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Empty callback provided as a probe to the markers. By providing this to a
+ * disabled marker, we make sure the  execution flow is always valid even
+ * though the function pointer change and the marker enabling are two distinct
+ * operations that modifies the execution flow of preemptible code.
+ */
+void __mark_empty_function(const struct marker *mdata, void *private,
+	const char *fmt, ...)
+{
+}
+EXPORT_SYMBOL_GPL(__mark_empty_function);
+
+/*
+ * Get marker if the marker is present in the marker hash table.
+ * Must be called with markers_mutex held.
+ * Returns NULL if not present.
+ */
+static struct marker_entry *get_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	u32 hash = jhash(name, strlen(name), 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the marker to the marker hash table. Must be called with markers_mutex
+ * held.
+ */
+static int add_marker(const char *name, const char *format,
+	marker_probe_func *probe, void *private)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	size_t format_len = 0;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	if (format)
+		format_len = strlen(format) + 1;
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"Marker %s busy, probe %p already installed\n",
+				name, e->probe);
+			return -EBUSY;	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], name, name_len);
+	if (format) {
+		e->format = &e->name[name_len];
+		memcpy(e->format, format, format_len);
+		trace_mark(core_marker_format, "name %s format %s",
+				e->name, e->format);
+	} else
+		e->format = NULL;
+	e->probe = probe;
+	e->private = private;
+	e->refcount = 0;
+	hlist_add_head(&e->hlist, head);
+	return 0;
+}
+
+/*
+ * Remove the marker from the marker hash table. Must be called with mutex_lock
+ * held.
+ */
+static void *remove_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	void *private = NULL;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (found) {
+		private = e->private;
+		hlist_del(&e->hlist);
+		kfree(e);
+	}
+	return private;
+}
+
+/*
+ * Set the mark_entry format to the format found in the element.
+ */
+static int marker_set_format(struct marker_entry **entry, const char *format)
+{
+	struct marker_entry *e;
+	size_t name_len = strlen((*entry)->name) + 1;
+	size_t format_len = strlen(format) + 1;
+
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], (*entry)->name, name_len);
+	e->format = &e->name[name_len];
+	memcpy(e->format, format, format_len);
+	e->probe = (*entry)->probe;
+	e->private = (*entry)->private;
+	e->refcount = (*entry)->refcount;
+	hlist_add_before(&e->hlist, &(*entry)->hlist);
+	hlist_del(&(*entry)->hlist);
+	kfree(*entry);
+	*entry = e;
+	trace_mark(core_marker_format, "name %s format %s",
+			e->name, e->format);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one marker.
+ */
+static int set_marker(struct marker_entry **entry, struct marker *elem)
+{
+	int ret;
+	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	if ((*entry)->format) {
+		if (strcmp((*entry)->format, elem->format) != 0) {
+			printk(KERN_NOTICE
+				"Format mismatch for probe %s "
+				"(%s), marker (%s)\n",
+				(*entry)->name,
+				(*entry)->format,
+				elem->format);
+			return -EPERM;
+		}
+	} else {
+		ret = marker_set_format(entry, elem->format);
+		if (ret)
+			return ret;
+	}
+	elem->call = (*entry)->probe;
+	elem->private = (*entry)->private;
+	elem->state = 1;
+	return 0;
+}
+
+/*
+ * Disable a marker and its probe callback.
+ * Note: only after a synchronize_sched() issued after setting elem->call to the
+ * empty function insures that the original callback is not used anymore. This
+ * insured by preemption disabling around the call site.
+ */
+static void disable_marker(struct marker *elem)
+{
+	elem->state = 0;
+	elem->call = __mark_empty_function;
+	/*
+	 * Leave the private data and id there, because removal is racy and
+	 * should be done only after a synchronize_sched(). These are never used
+	 * until the next initialization anyway.
+	 */
+}
+
+/**
+ * marker_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ * @probe_module: module address of the probe being updated
+ * @refcount: number of references left to the given probe_module (out)
+ *
+ * Updates the probe callback corresponding to a range of markers.
+ * Must be called with markers_mutex held.
+ */
+void marker_update_probe_range(struct marker *begin,
+	struct marker *end, struct module *probe_module,
+	int *refcount)
+{
+	struct marker *iter;
+	struct marker_entry *mark_entry;
+
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = get_marker(iter->name);
+		if (mark_entry && mark_entry->refcount) {
+			set_marker(&mark_entry, iter);
+			/*
+			 * ignore error, continue
+			 */
+			if (probe_module)
+				if (probe_module ==
+			__module_text_address((unsigned long)mark_entry->probe))
+					(*refcount)++;
+		} else {
+			disable_marker(iter);
+		}
+	}
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ * Issues a synchronize_sched() when no reference to the module passed
+ * as parameter is found in the probes so the probe module can be
+ * safely unloaded from now on.
+ */
+static void marker_update_probes(struct module *probe_module)
+{
+	int refcount = 0;
+
+	mutex_lock(&markers_mutex);
+	/* Core kernel markers */
+	marker_update_probe_range(__start___markers,
+			__stop___markers, probe_module, &refcount);
+	/* Markers in modules. */
+	module_update_markers(probe_module, &refcount);
+	if (probe_module && refcount == 0) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	mutex_unlock(&markers_mutex);
+}
+
+/**
+ * marker_probe_register -  Connect a probe to a marker
+ * @name: marker name
+ * @format: format string
+ * @probe: probe handler
+ * @private: probe private data
+ *
+ * private data must be a valid allocated memory address, or NULL.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_probe_register(const char *name, const char *format,
+			marker_probe_func *probe, void *private)
+{
+	struct marker_entry *entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (entry && entry->refcount) {
+		ret = -EBUSY;
+		goto end;
+	}
+	if (deferred_sync) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	ret = add_marker(name, format, probe, private);
+	if (ret)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_register);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @name: marker name
+ *
+ * Returns the private data given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister(const char *name)
+{
+	struct module *probe_module;
+	struct marker_entry *entry;
+	void *private;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		private = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	private = remove_marker(name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return private;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister);
+
+/**
+ * marker_probe_unregister_private_data -  Disconnect a probe from a marker
+ * @private: probe private data
+ *
+ * Unregister a marker by providing the registered private data.
+ * Returns the private data given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister_private_data(void *private)
+{
+	struct module *probe_module;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *entry;
+	int found = 0;
+	unsigned int i;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+		head = &marker_table[i];
+		hlist_for_each_entry(entry, node, head, hlist) {
+			if (entry->private == private) {
+				found = 1;
+				goto iter_end;
+			}
+		}
+	}
+iter_end:
+	if (!found) {
+		private = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	private = remove_marker(entry->name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return private;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
+
+/**
+ * marker_arm - Arm a marker
+ * @name: marker name
+ *
+ * Activate a marker. It keeps a reference count of the number of
+ * arming/disarming done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_arm(const char *name)
+{
+	struct marker_entry *entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only need to update probes when refcount passes from 0 to 1.
+	 */
+	if (entry->refcount++)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_arm);
+
+/**
+ * marker_disarm - Disarm a marker
+ * @name: marker name
+ *
+ * Disarm a marker. It keeps a reference count of the number of arming/disarming
+ * done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_disarm(const char *name)
+{
+	struct marker_entry *entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only permit decrement refcount if higher than 0.
+	 * Do probe update only on 1 -> 0 transition.
+	 */
+	if (entry->refcount) {
+		if (--entry->refcount)
+			goto end;
+	} else {
+		ret = -EPERM;
+		goto end;
+	}
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_disarm);
+
+/**
+ * marker_get_private_data - Get a marker's probe private data
+ * @name: marker name
+ *
+ * Returns the private data pointer, or an ERR_PTR.
+ * The private data pointer should _only_ be dereferenced if the caller is the
+ * owner of the data, or its content could vanish. This is mostly used to
+ * confirm that a caller is the owner of a registered probe.
+ */
+void *marker_get_private_data(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+	int found = 0;
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			return e->private;
+		}
+	}
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(marker_get_private_data);
diff --git a/kernel/module.c b/kernel/module.c
index 7734595..3202c99 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1673,6 +1673,8 @@
 	unsigned int unusedcrcindex;
 	unsigned int unusedgplindex;
 	unsigned int unusedgplcrcindex;
+	unsigned int markersindex;
+	unsigned int markersstringsindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1939,6 +1941,9 @@
 		add_taint_module(mod, TAINT_FORCED_MODULE);
 	}
 #endif
+	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
+ 	markersstringsindex = find_sec(hdr, sechdrs, secstrings,
+					"__markers_strings");
 
 	/* Now do relocations. */
 	for (i = 1; i < hdr->e_shnum; i++) {
@@ -1961,6 +1966,11 @@
 		if (err < 0)
 			goto cleanup;
 	}
+#ifdef CONFIG_MARKERS
+	mod->markers = (void *)sechdrs[markersindex].sh_addr;
+	mod->num_markers =
+		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
+#endif
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -1979,6 +1989,11 @@
 
 	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+#ifdef CONFIG_MARKERS
+	if (!mod->taints)
+		marker_update_probe_range(mod->markers,
+			mod->markers + mod->num_markers, NULL, NULL);
+#endif
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2570,3 +2585,18 @@
 void struct_module(struct module *mod) { return; }
 EXPORT_SYMBOL(struct_module);
 #endif
+
+#ifdef CONFIG_MARKERS
+void module_update_markers(struct module *probe_module, int *refcount)
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(mod, &modules, list)
+		if (!mod->taints)
+			marker_update_probe_range(mod->markers,
+				mod->markers + mod->num_markers,
+				probe_module, refcount);
+	mutex_unlock(&module_mutex);
+}
+#endif
diff --git a/kernel/notifier.c b/kernel/notifier.c
new file mode 100644
index 0000000..4253f47
--- /dev/null
+++ b/kernel/notifier.c
@@ -0,0 +1,539 @@
+#include <linux/kdebug.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/vmalloc.h>
+
+/*
+ *	Notifier list for kernel code which wants to be called
+ *	at shutdown. This is used to stop any idling DMA operations
+ *	and the like.
+ */
+BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
+
+/*
+ *	Notifier chain core routines.  The exported routines below
+ *	are layered on top of these, with appropriate locking added.
+ */
+
+static int notifier_chain_register(struct notifier_block **nl,
+		struct notifier_block *n)
+{
+	while ((*nl) != NULL) {
+		if (n->priority > (*nl)->priority)
+			break;
+		nl = &((*nl)->next);
+	}
+	n->next = *nl;
+	rcu_assign_pointer(*nl, n);
+	return 0;
+}
+
+static int notifier_chain_unregister(struct notifier_block **nl,
+		struct notifier_block *n)
+{
+	while ((*nl) != NULL) {
+		if ((*nl) == n) {
+			rcu_assign_pointer(*nl, n->next);
+			return 0;
+		}
+		nl = &((*nl)->next);
+	}
+	return -ENOENT;
+}
+
+/**
+ * notifier_call_chain - Informs the registered notifiers about an event.
+ *	@nl:		Pointer to head of the blocking notifier chain
+ *	@val:		Value passed unmodified to notifier function
+ *	@v:		Pointer passed unmodified to notifier function
+ *	@nr_to_call:	Number of notifier functions to be called. Don't care
+ *			value of this parameter is -1.
+ *	@nr_calls:	Records the number of notifications sent. Don't care
+ *			value of this field is NULL.
+ *	@returns:	notifier_call_chain returns the value returned by the
+ *			last notifier function called.
+ */
+static int __kprobes notifier_call_chain(struct notifier_block **nl,
+					unsigned long val, void *v,
+					int nr_to_call,	int *nr_calls)
+{
+	int ret = NOTIFY_DONE;
+	struct notifier_block *nb, *next_nb;
+
+	nb = rcu_dereference(*nl);
+
+	while (nb && nr_to_call) {
+		next_nb = rcu_dereference(nb->next);
+		ret = nb->notifier_call(nb, val, v);
+
+		if (nr_calls)
+			(*nr_calls)++;
+
+		if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
+			break;
+		nb = next_nb;
+		nr_to_call--;
+	}
+	return ret;
+}
+
+/*
+ *	Atomic notifier chain routines.  Registration and unregistration
+ *	use a spinlock, and call_chain is synchronized by RCU (no locks).
+ */
+
+/**
+ *	atomic_notifier_chain_register - Add notifier to an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to an atomic notifier chain.
+ *
+ *	Currently always returns zero.
+ */
+int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+		struct notifier_block *n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&nh->lock, flags);
+	ret = notifier_chain_register(&nh->head, n);
+	spin_unlock_irqrestore(&nh->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
+
+/**
+ *	atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from an atomic notifier chain.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
+		struct notifier_block *n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&nh->lock, flags);
+	ret = notifier_chain_unregister(&nh->head, n);
+	spin_unlock_irqrestore(&nh->lock, flags);
+	synchronize_rcu();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
+
+/**
+ *	__atomic_notifier_call_chain - Call functions in an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See the comment for notifier_call_chain.
+ *	@nr_calls: See the comment for notifier_call_chain.
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in an atomic context, so they must not block.
+ *	This routine uses RCU to synchronize with changes to the chain.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+					unsigned long val, void *v,
+					int nr_to_call, int *nr_calls)
+{
+	int ret;
+
+	rcu_read_lock();
+	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
+
+int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
+
+/*
+ *	Blocking notifier chain routines.  All access to the chain is
+ *	synchronized by an rwsem.
+ */
+
+/**
+ *	blocking_notifier_chain_register - Add notifier to a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to a blocking notifier chain.
+ *	Must be called in process context.
+ *
+ *	Currently always returns zero.
+ */
+int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call down_write().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_register(&nh->head, n);
+
+	down_write(&nh->rwsem);
+	ret = notifier_chain_register(&nh->head, n);
+	up_write(&nh->rwsem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
+
+/**
+ *	blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from a blocking notifier chain.
+ *	Must be called from process context.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call down_write().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_unregister(&nh->head, n);
+
+	down_write(&nh->rwsem);
+	ret = notifier_chain_unregister(&nh->head, n);
+	up_write(&nh->rwsem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
+
+/**
+ *	__blocking_notifier_call_chain - Call functions in a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See comment for notifier_call_chain.
+ *	@nr_calls: See comment for notifier_call_chain.
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in a process context, so they are allowed to block.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+				   unsigned long val, void *v,
+				   int nr_to_call, int *nr_calls)
+{
+	int ret = NOTIFY_DONE;
+
+	/*
+	 * We check the head outside the lock, but if this access is
+	 * racy then it does not matter what the result of the test
+	 * is, we re-check the list after having taken the lock anyway:
+	 */
+	if (rcu_dereference(nh->head)) {
+		down_read(&nh->rwsem);
+		ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
+					nr_calls);
+		up_read(&nh->rwsem);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
+
+int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
+
+/*
+ *	Raw notifier chain routines.  There is no protection;
+ *	the caller must provide it.  Use at your own risk!
+ */
+
+/**
+ *	raw_notifier_chain_register - Add notifier to a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to a raw notifier chain.
+ *	All locking must be provided by the caller.
+ *
+ *	Currently always returns zero.
+ */
+int raw_notifier_chain_register(struct raw_notifier_head *nh,
+		struct notifier_block *n)
+{
+	return notifier_chain_register(&nh->head, n);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
+
+/**
+ *	raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from a raw notifier chain.
+ *	All locking must be provided by the caller.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
+		struct notifier_block *n)
+{
+	return notifier_chain_unregister(&nh->head, n);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
+
+/**
+ *	__raw_notifier_call_chain - Call functions in a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See comment for notifier_call_chain.
+ *	@nr_calls: See comment for notifier_call_chain
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in an undefined context.
+ *	All locking must be provided by the caller.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __raw_notifier_call_chain(struct raw_notifier_head *nh,
+			      unsigned long val, void *v,
+			      int nr_to_call, int *nr_calls)
+{
+	return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+}
+EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
+
+int raw_notifier_call_chain(struct raw_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __raw_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
+
+/*
+ *	SRCU notifier chain routines.    Registration and unregistration
+ *	use a mutex, and call_chain is synchronized by SRCU (no locks).
+ */
+
+/**
+ *	srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to an SRCU notifier chain.
+ *	Must be called in process context.
+ *
+ *	Currently always returns zero.
+ */
+int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_register(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_register(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
+
+/**
+ *	srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from an SRCU notifier chain.
+ *	Must be called from process context.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_unregister(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_unregister(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	synchronize_srcu(&nh->srcu);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
+
+/**
+ *	__srcu_notifier_call_chain - Call functions in an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See comment for notifier_call_chain.
+ *	@nr_calls: See comment for notifier_call_chain
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in a process context, so they are allowed to block.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+			       unsigned long val, void *v,
+			       int nr_to_call, int *nr_calls)
+{
+	int ret;
+	int idx;
+
+	idx = srcu_read_lock(&nh->srcu);
+	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+	srcu_read_unlock(&nh->srcu, idx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
+
+int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
+
+/**
+ *	srcu_init_notifier_head - Initialize an SRCU notifier head
+ *	@nh: Pointer to head of the srcu notifier chain
+ *
+ *	Unlike other sorts of notifier heads, SRCU notifier heads require
+ *	dynamic initialization.  Be sure to call this routine before
+ *	calling any of the other SRCU notifier routines for this head.
+ *
+ *	If an SRCU notifier head is deallocated, it must first be cleaned
+ *	up by calling srcu_cleanup_notifier_head().  Otherwise the head's
+ *	per-cpu data (used by the SRCU mechanism) will leak.
+ */
+void srcu_init_notifier_head(struct srcu_notifier_head *nh)
+{
+	mutex_init(&nh->mutex);
+	if (init_srcu_struct(&nh->srcu) < 0)
+		BUG();
+	nh->head = NULL;
+}
+EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
+
+/**
+ *	register_reboot_notifier - Register function to be called at reboot time
+ *	@nb: Info about notifier function to be called
+ *
+ *	Registers a function with the list of functions
+ *	to be called at reboot time.
+ *
+ *	Currently always returns zero, as blocking_notifier_chain_register()
+ *	always returns zero.
+ */
+int register_reboot_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(register_reboot_notifier);
+
+/**
+ *	unregister_reboot_notifier - Unregister previously registered reboot notifier
+ *	@nb: Hook to be unregistered
+ *
+ *	Unregisters a previously registered reboot
+ *	notifier function.
+ *
+ *	Returns zero on success, or %-ENOENT on failure.
+ */
+int unregister_reboot_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(unregister_reboot_notifier);
+
+static ATOMIC_NOTIFIER_HEAD(die_chain);
+
+int notify_die(enum die_val val, const char *str,
+	       struct pt_regs *regs, long err, int trap, int sig)
+{
+	struct die_args args = {
+		.regs	= regs,
+		.str	= str,
+		.err	= err,
+		.trapnr	= trap,
+		.signr	= sig,
+
+	};
+	return atomic_notifier_call_chain(&die_chain, val, &args);
+}
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	vmalloc_sync_all();
+	return atomic_notifier_chain_register(&die_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&die_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_die_notifier);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
new file mode 100644
index 0000000..aead4d6
--- /dev/null
+++ b/kernel/ns_cgroup.c
@@ -0,0 +1,100 @@
+/*
+ * ns_cgroup.c - namespace cgroup subsystem
+ *
+ * Copyright 2006, 2007 IBM Corp
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+
+struct ns_cgroup {
+	struct cgroup_subsys_state css;
+	spinlock_t lock;
+};
+
+struct cgroup_subsys ns_subsys;
+
+static inline struct ns_cgroup *cgroup_to_ns(
+		struct cgroup *cgroup)
+{
+	return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
+			    struct ns_cgroup, css);
+}
+
+int ns_cgroup_clone(struct task_struct *task)
+{
+	return cgroup_clone(task, &ns_subsys);
+}
+
+/*
+ * Rules:
+ *   1. you can only enter a cgroup which is a child of your current
+ *     cgroup
+ *   2. you can only place another process into a cgroup if
+ *     a. you have CAP_SYS_ADMIN
+ *     b. your cgroup is an ancestor of task's destination cgroup
+ *       (hence either you are in the same cgroup as task, or in an
+ *        ancestor cgroup thereof)
+ */
+static int ns_can_attach(struct cgroup_subsys *ss,
+		struct cgroup *new_cgroup, struct task_struct *task)
+{
+	struct cgroup *orig;
+
+	if (current != task) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (!cgroup_is_descendant(new_cgroup))
+			return -EPERM;
+	}
+
+	if (atomic_read(&new_cgroup->count) != 0)
+		return -EPERM;
+
+	orig = task_cgroup(task, ns_subsys_id);
+	if (orig && orig != new_cgroup->parent)
+		return -EPERM;
+
+	return 0;
+}
+
+/*
+ * Rules: you can only create a cgroup if
+ *     1. you are capable(CAP_SYS_ADMIN)
+ *     2. the target cgroup is a descendant of your own cgroup
+ */
+static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
+						struct cgroup *cgroup)
+{
+	struct ns_cgroup *ns_cgroup;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+	if (!cgroup_is_descendant(cgroup))
+		return ERR_PTR(-EPERM);
+
+	ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
+	if (!ns_cgroup)
+		return ERR_PTR(-ENOMEM);
+	spin_lock_init(&ns_cgroup->lock);
+	return &ns_cgroup->css;
+}
+
+static void ns_destroy(struct cgroup_subsys *ss,
+			struct cgroup *cgroup)
+{
+	struct ns_cgroup *ns_cgroup;
+
+	ns_cgroup = cgroup_to_ns(cgroup);
+	kfree(ns_cgroup);
+}
+
+struct cgroup_subsys ns_subsys = {
+	.name = "ns",
+	.can_attach = ns_can_attach,
+	.create = ns_create,
+	.destroy  = ns_destroy,
+	.subsys_id = ns_subsys_id,
+};
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 049e7c0..79f871b 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,19 +26,6 @@
 
 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
-static inline void get_nsproxy(struct nsproxy *ns)
-{
-	atomic_inc(&ns->count);
-}
-
-void get_task_namespaces(struct task_struct *tsk)
-{
-	struct nsproxy *ns = tsk->nsproxy;
-	if (ns) {
-		get_nsproxy(ns);
-	}
-}
-
 /*
  * creates a copy of "orig" with refcount 1.
  */
@@ -87,7 +74,7 @@
 		goto out_ipc;
 	}
 
-	new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns);
+	new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
 	if (IS_ERR(new_nsp->pid_ns)) {
 		err = PTR_ERR(new_nsp->pid_ns);
 		goto out_pid;
@@ -142,7 +129,8 @@
 
 	get_nsproxy(old_ns);
 
-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET)))
+	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+				CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -156,7 +144,14 @@
 		goto out;
 	}
 
+	err = ns_cgroup_clone(tsk);
+	if (err) {
+		put_nsproxy(new_ns);
+		goto out;
+	}
+
 	tsk->nsproxy = new_ns;
+
 out:
 	put_nsproxy(old_ns);
 	return err;
@@ -196,11 +191,46 @@
 
 	*new_nsp = create_new_namespaces(unshare_flags, current,
 				new_fs ? new_fs : current->fs);
-	if (IS_ERR(*new_nsp))
+	if (IS_ERR(*new_nsp)) {
 		err = PTR_ERR(*new_nsp);
+		goto out;
+	}
+
+	err = ns_cgroup_clone(current);
+	if (err)
+		put_nsproxy(*new_nsp);
+
+out:
 	return err;
 }
 
+void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
+{
+	struct nsproxy *ns;
+
+	might_sleep();
+
+	ns = p->nsproxy;
+
+	rcu_assign_pointer(p->nsproxy, new);
+
+	if (ns && atomic_dec_and_test(&ns->count)) {
+		/*
+		 * wait for others to get what they want from this nsproxy.
+		 *
+		 * cannot release this nsproxy via the call_rcu() since
+		 * put_mnt_ns() will want to sleep
+		 */
+		synchronize_rcu();
+		free_nsproxy(ns);
+	}
+}
+
+void exit_task_namespaces(struct task_struct *p)
+{
+	switch_task_namespaces(p, NULL);
+}
+
 static int __init nsproxy_cache_init(void)
 {
 	nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
diff --git a/kernel/pid.c b/kernel/pid.c
index c6e3f9f..d1db36b 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -18,6 +18,12 @@
  * allocation scenario when all but one out of 1 million PIDs possible are
  * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
  * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
+ *
+ * Pid namespaces:
+ *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
+ *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
+ *     Many thanks to Oleg Nesterov for comments and help
+ *
  */
 
 #include <linux/mm.h>
@@ -28,12 +34,14 @@
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
+#include <linux/syscalls.h>
 
-#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
+#define pid_hashfn(nr, ns)	\
+	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
 static struct hlist_head *pid_hash;
 static int pidhash_shift;
-static struct kmem_cache *pid_cachep;
 struct pid init_struct_pid = INIT_STRUCT_PID;
+static struct kmem_cache *pid_ns_cachep;
 
 int pid_max = PID_MAX_DEFAULT;
 
@@ -68,8 +76,25 @@
 		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
 	},
 	.last_pid = 0,
-	.child_reaper = &init_task
+	.level = 0,
+	.child_reaper = &init_task,
 };
+EXPORT_SYMBOL_GPL(init_pid_ns);
+
+int is_container_init(struct task_struct *tsk)
+{
+	int ret = 0;
+	struct pid *pid;
+
+	rcu_read_lock();
+	pid = task_pid(tsk);
+	if (pid != NULL && pid->numbers[pid->level].nr == 1)
+		ret = 1;
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(is_container_init);
 
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
@@ -176,11 +201,17 @@
 
 fastcall void put_pid(struct pid *pid)
 {
+	struct pid_namespace *ns;
+
 	if (!pid)
 		return;
+
+	ns = pid->numbers[pid->level].ns;
 	if ((atomic_read(&pid->count) == 1) ||
-	     atomic_dec_and_test(&pid->count))
-		kmem_cache_free(pid_cachep, pid);
+	     atomic_dec_and_test(&pid->count)) {
+		kmem_cache_free(ns->pid_cachep, pid);
+		put_pid_ns(ns);
+	}
 }
 EXPORT_SYMBOL_GPL(put_pid);
 
@@ -193,60 +224,94 @@
 fastcall void free_pid(struct pid *pid)
 {
 	/* We can be called with write_lock_irq(&tasklist_lock) held */
+	int i;
 	unsigned long flags;
 
 	spin_lock_irqsave(&pidmap_lock, flags);
-	hlist_del_rcu(&pid->pid_chain);
+	for (i = 0; i <= pid->level; i++)
+		hlist_del_rcu(&pid->numbers[i].pid_chain);
 	spin_unlock_irqrestore(&pidmap_lock, flags);
 
-	free_pidmap(&init_pid_ns, pid->nr);
+	for (i = 0; i <= pid->level; i++)
+		free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr);
+
 	call_rcu(&pid->rcu, delayed_put_pid);
 }
 
-struct pid *alloc_pid(void)
+struct pid *alloc_pid(struct pid_namespace *ns)
 {
 	struct pid *pid;
 	enum pid_type type;
-	int nr = -1;
+	int i, nr;
+	struct pid_namespace *tmp;
+	struct upid *upid;
 
-	pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
+	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
 	if (!pid)
 		goto out;
 
-	nr = alloc_pidmap(current->nsproxy->pid_ns);
-	if (nr < 0)
-		goto out_free;
+	tmp = ns;
+	for (i = ns->level; i >= 0; i--) {
+		nr = alloc_pidmap(tmp);
+		if (nr < 0)
+			goto out_free;
 
+		pid->numbers[i].nr = nr;
+		pid->numbers[i].ns = tmp;
+		tmp = tmp->parent;
+	}
+
+	get_pid_ns(ns);
+	pid->level = ns->level;
 	atomic_set(&pid->count, 1);
-	pid->nr = nr;
 	for (type = 0; type < PIDTYPE_MAX; ++type)
 		INIT_HLIST_HEAD(&pid->tasks[type]);
 
 	spin_lock_irq(&pidmap_lock);
-	hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
+	for (i = ns->level; i >= 0; i--) {
+		upid = &pid->numbers[i];
+		hlist_add_head_rcu(&upid->pid_chain,
+				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+	}
 	spin_unlock_irq(&pidmap_lock);
 
 out:
 	return pid;
 
 out_free:
-	kmem_cache_free(pid_cachep, pid);
+	for (i++; i <= ns->level; i++)
+		free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr);
+
+	kmem_cache_free(ns->pid_cachep, pid);
 	pid = NULL;
 	goto out;
 }
 
-struct pid * fastcall find_pid(int nr)
+struct pid * fastcall find_pid_ns(int nr, struct pid_namespace *ns)
 {
 	struct hlist_node *elem;
-	struct pid *pid;
+	struct upid *pnr;
 
-	hlist_for_each_entry_rcu(pid, elem,
-			&pid_hash[pid_hashfn(nr)], pid_chain) {
-		if (pid->nr == nr)
-			return pid;
-	}
+	hlist_for_each_entry_rcu(pnr, elem,
+			&pid_hash[pid_hashfn(nr, ns)], pid_chain)
+		if (pnr->nr == nr && pnr->ns == ns)
+			return container_of(pnr, struct pid,
+					numbers[ns->level]);
+
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(find_pid_ns);
+
+struct pid *find_vpid(int nr)
+{
+	return find_pid_ns(nr, current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL_GPL(find_vpid);
+
+struct pid *find_pid(int nr)
+{
+	return find_pid_ns(nr, &init_pid_ns);
+}
 EXPORT_SYMBOL_GPL(find_pid);
 
 /*
@@ -307,12 +372,32 @@
 /*
  * Must be called under rcu_read_lock() or with tasklist_lock read-held.
  */
-struct task_struct *find_task_by_pid_type(int type, int nr)
+struct task_struct *find_task_by_pid_type_ns(int type, int nr,
+		struct pid_namespace *ns)
 {
-	return pid_task(find_pid(nr), type);
+	return pid_task(find_pid_ns(nr, ns), type);
 }
 
-EXPORT_SYMBOL(find_task_by_pid_type);
+EXPORT_SYMBOL(find_task_by_pid_type_ns);
+
+struct task_struct *find_task_by_pid(pid_t nr)
+{
+	return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
+}
+EXPORT_SYMBOL(find_task_by_pid);
+
+struct task_struct *find_task_by_vpid(pid_t vnr)
+{
+	return find_task_by_pid_type_ns(PIDTYPE_PID, vnr,
+			current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL(find_task_by_vpid);
+
+struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
+{
+	return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
+}
+EXPORT_SYMBOL(find_task_by_pid_ns);
 
 struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
 {
@@ -339,45 +424,239 @@
 	struct pid *pid;
 
 	rcu_read_lock();
-	pid = get_pid(find_pid(nr));
+	pid = get_pid(find_vpid(nr));
 	rcu_read_unlock();
 
 	return pid;
 }
 
+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+{
+	struct upid *upid;
+	pid_t nr = 0;
+
+	if (pid && ns->level <= pid->level) {
+		upid = &pid->numbers[ns->level];
+		if (upid->ns == ns)
+			nr = upid->nr;
+	}
+	return nr;
+}
+
+pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_pid(tsk), ns);
+}
+EXPORT_SYMBOL(task_pid_nr_ns);
+
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_tgid(tsk), ns);
+}
+EXPORT_SYMBOL(task_tgid_nr_ns);
+
+pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_pgrp(tsk), ns);
+}
+EXPORT_SYMBOL(task_pgrp_nr_ns);
+
+pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_session(tsk), ns);
+}
+EXPORT_SYMBOL(task_session_nr_ns);
+
 /*
  * Used by proc to find the first pid that is greater then or equal to nr.
  *
  * If there is a pid at nr this function is exactly the same as find_pid.
  */
-struct pid *find_ge_pid(int nr)
+struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 {
 	struct pid *pid;
 
 	do {
-		pid = find_pid(nr);
+		pid = find_pid_ns(nr, ns);
 		if (pid)
 			break;
-		nr = next_pidmap(current->nsproxy->pid_ns, nr);
+		nr = next_pidmap(ns, nr);
 	} while (nr > 0);
 
 	return pid;
 }
 EXPORT_SYMBOL_GPL(find_get_pid);
 
+struct pid_cache {
+	int nr_ids;
+	char name[16];
+	struct kmem_cache *cachep;
+	struct list_head list;
+};
+
+static LIST_HEAD(pid_caches_lh);
+static DEFINE_MUTEX(pid_caches_mutex);
+
+/*
+ * creates the kmem cache to allocate pids from.
+ * @nr_ids: the number of numerical ids this pid will have to carry
+ */
+
+static struct kmem_cache *create_pid_cachep(int nr_ids)
+{
+	struct pid_cache *pcache;
+	struct kmem_cache *cachep;
+
+	mutex_lock(&pid_caches_mutex);
+	list_for_each_entry (pcache, &pid_caches_lh, list)
+		if (pcache->nr_ids == nr_ids)
+			goto out;
+
+	pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
+	if (pcache == NULL)
+		goto err_alloc;
+
+	snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
+	cachep = kmem_cache_create(pcache->name,
+			sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
+			0, SLAB_HWCACHE_ALIGN, NULL);
+	if (cachep == NULL)
+		goto err_cachep;
+
+	pcache->nr_ids = nr_ids;
+	pcache->cachep = cachep;
+	list_add(&pcache->list, &pid_caches_lh);
+out:
+	mutex_unlock(&pid_caches_mutex);
+	return pcache->cachep;
+
+err_cachep:
+	kfree(pcache);
+err_alloc:
+	mutex_unlock(&pid_caches_mutex);
+	return NULL;
+}
+
+static struct pid_namespace *create_pid_namespace(int level)
+{
+	struct pid_namespace *ns;
+	int i;
+
+	ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
+	if (ns == NULL)
+		goto out;
+
+	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!ns->pidmap[0].page)
+		goto out_free;
+
+	ns->pid_cachep = create_pid_cachep(level + 1);
+	if (ns->pid_cachep == NULL)
+		goto out_free_map;
+
+	kref_init(&ns->kref);
+	ns->last_pid = 0;
+	ns->child_reaper = NULL;
+	ns->level = level;
+
+	set_bit(0, ns->pidmap[0].page);
+	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
+
+	for (i = 1; i < PIDMAP_ENTRIES; i++) {
+		ns->pidmap[i].page = 0;
+		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
+	}
+
+	return ns;
+
+out_free_map:
+	kfree(ns->pidmap[0].page);
+out_free:
+	kmem_cache_free(pid_ns_cachep, ns);
+out:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void destroy_pid_namespace(struct pid_namespace *ns)
+{
+	int i;
+
+	for (i = 0; i < PIDMAP_ENTRIES; i++)
+		kfree(ns->pidmap[i].page);
+	kmem_cache_free(pid_ns_cachep, ns);
+}
+
 struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
 {
+	struct pid_namespace *new_ns;
+
 	BUG_ON(!old_ns);
-	get_pid_ns(old_ns);
-	return old_ns;
+	new_ns = get_pid_ns(old_ns);
+	if (!(flags & CLONE_NEWPID))
+		goto out;
+
+	new_ns = ERR_PTR(-EINVAL);
+	if (flags & CLONE_THREAD)
+		goto out_put;
+
+	new_ns = create_pid_namespace(old_ns->level + 1);
+	if (!IS_ERR(new_ns))
+		new_ns->parent = get_pid_ns(old_ns);
+
+out_put:
+	put_pid_ns(old_ns);
+out:
+	return new_ns;
 }
 
 void free_pid_ns(struct kref *kref)
 {
-	struct pid_namespace *ns;
+	struct pid_namespace *ns, *parent;
 
 	ns = container_of(kref, struct pid_namespace, kref);
-	kfree(ns);
+
+	parent = ns->parent;
+	destroy_pid_namespace(ns);
+
+	if (parent != NULL)
+		put_pid_ns(parent);
+}
+
+void zap_pid_ns_processes(struct pid_namespace *pid_ns)
+{
+	int nr;
+	int rc;
+
+	/*
+	 * The last thread in the cgroup-init thread group is terminating.
+	 * Find remaining pid_ts in the namespace, signal and wait for them
+	 * to exit.
+	 *
+	 * Note:  This signals each threads in the namespace - even those that
+	 * 	  belong to the same thread group, To avoid this, we would have
+	 * 	  to walk the entire tasklist looking a processes in this
+	 * 	  namespace, but that could be unnecessarily expensive if the
+	 * 	  pid namespace has just a few processes. Or we need to
+	 * 	  maintain a tasklist for each pid namespace.
+	 *
+	 */
+	read_lock(&tasklist_lock);
+	nr = next_pidmap(pid_ns, 1);
+	while (nr > 0) {
+		kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
+		nr = next_pidmap(pid_ns, nr);
+	}
+	read_unlock(&tasklist_lock);
+
+	do {
+		clear_thread_flag(TIF_SIGPENDING);
+		rc = sys_wait4(-1, NULL, __WALL, NULL);
+	} while (rc != -ECHILD);
+
+
+	/* Child reaper for the pid namespace is going away */
+	pid_ns->child_reaper = NULL;
+	return;
 }
 
 /*
@@ -412,5 +691,9 @@
 	set_bit(0, init_pid_ns.pidmap[0].page);
 	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
 
-	pid_cachep = KMEM_CACHE(pid, SLAB_PANIC);
+	init_pid_ns.pid_cachep = create_pid_cachep(1);
+	if (init_pid_ns.pid_cachep == NULL)
+		panic("Can't create pid_1 cachep\n");
+
+	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
 }
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index b53c8fc..68c9637 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -21,8 +21,8 @@
 
 	read_lock(&tasklist_lock);
 	p = find_task_by_pid(pid);
-	if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
-		   p->tgid != current->tgid : p->tgid != pid)) {
+	if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
+		   same_thread_group(p, current) : thread_group_leader(p))) {
 		error = -EINVAL;
 	}
 	read_unlock(&tasklist_lock);
@@ -308,13 +308,13 @@
 		p = find_task_by_pid(pid);
 		if (p) {
 			if (CPUCLOCK_PERTHREAD(which_clock)) {
-				if (p->tgid == current->tgid) {
+				if (same_thread_group(p, current)) {
 					error = cpu_clock_sample(which_clock,
 								 p, &rtn);
 				}
 			} else {
 				read_lock(&tasklist_lock);
-				if (p->tgid == pid && p->signal) {
+				if (thread_group_leader(p) && p->signal) {
 					error =
 					    cpu_clock_sample_group(which_clock,
 							           p, &rtn);
@@ -355,7 +355,7 @@
 			p = current;
 		} else {
 			p = find_task_by_pid(pid);
-			if (p && p->tgid != current->tgid)
+			if (p && !same_thread_group(p, current))
 				p = NULL;
 		}
 	} else {
@@ -363,7 +363,7 @@
 			p = current->group_leader;
 		} else {
 			p = find_task_by_pid(pid);
-			if (p && p->tgid != pid)
+			if (p && !thread_group_leader(p))
 				p = NULL;
 		}
 	}
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d11f579..35b4bbf 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -404,7 +404,7 @@
 
 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
 		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
-		 rtn->tgid != current->tgid ||
+		 !same_thread_group(rtn, current) ||
 		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
 		return NULL;
 
@@ -608,7 +608,7 @@
 		spin_lock(&timr->it_lock);
 
 		if ((timr->it_id != timer_id) || !(timr->it_process) ||
-				timr->it_process->tgid != current->tgid) {
+				!same_thread_group(timr->it_process, current)) {
 			spin_unlock(&timr->it_lock);
 			spin_unlock_irqrestore(&idr_lock, *flags);
 			timr = NULL;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a73ebd3..7c76f2f 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -19,6 +19,7 @@
 #include <linux/security.h>
 #include <linux/signal.h>
 #include <linux/audit.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -168,7 +169,7 @@
 	retval = -EPERM;
 	if (task->pid <= 1)
 		goto out;
-	if (task->tgid == current->tgid)
+	if (same_thread_group(task, current))
 		goto out;
 
 repeat:
@@ -443,7 +444,7 @@
 		return ERR_PTR(-EPERM);
 
 	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
+	child = find_task_by_vpid(pid);
 	if (child)
 		get_task_struct(child);
 
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 6b0703d..56d73cb 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -87,7 +87,7 @@
 static void printk_task(struct task_struct *p)
 {
 	if (p)
-		printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
+		printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
 	else
 		printk("<none>");
 }
@@ -152,22 +152,25 @@
 	printk(  "[ BUG: circular locking deadlock detected! ]\n");
 	printk(  "--------------------------------------------\n");
 	printk("%s/%d is deadlocking current task %s/%d\n\n",
-	       task->comm, task->pid, current->comm, current->pid);
+	       task->comm, task_pid_nr(task),
+	       current->comm, task_pid_nr(current));
 
 	printk("\n1) %s/%d is trying to acquire this lock:\n",
-	       current->comm, current->pid);
+	       current->comm, task_pid_nr(current));
 	printk_lock(waiter->lock, 1);
 
-	printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
+	printk("\n2) %s/%d is blocked on this lock:\n",
+		task->comm, task_pid_nr(task));
 	printk_lock(waiter->deadlock_lock, 1);
 
 	debug_show_held_locks(current);
 	debug_show_held_locks(task);
 
-	printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
+	printk("\n%s/%d's [blocked] stackdump:\n\n",
+		task->comm, task_pid_nr(task));
 	show_stack(task, NULL);
 	printk("\n%s/%d's [current] stackdump:\n\n",
-	       current->comm, current->pid);
+		current->comm, task_pid_nr(current));
 	dump_stack();
 	debug_show_all_locks();
 
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 8cd9bd2..0deef71 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -185,7 +185,7 @@
 			prev_max = max_lock_depth;
 			printk(KERN_WARNING "Maximum lock depth %d reached "
 			       "task: %s (%d)\n", max_lock_depth,
-			       top_task->comm, top_task->pid);
+			       top_task->comm, task_pid_nr(top_task));
 		}
 		put_task_struct(task);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index ed90be4..afe76ec 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -44,6 +44,7 @@
 #include <linux/vmalloc.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
+#include <linux/pid_namespace.h>
 #include <linux/smp.h>
 #include <linux/threads.h>
 #include <linux/timer.h>
@@ -51,6 +52,7 @@
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/percpu.h>
+#include <linux/cpu_acct.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
 #include <linux/sysctl.h>
@@ -153,10 +155,15 @@
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
+#include <linux/cgroup.h>
+
 struct cfs_rq;
 
 /* task group related information */
 struct task_group {
+#ifdef CONFIG_FAIR_CGROUP_SCHED
+	struct cgroup_subsys_state css;
+#endif
 	/* schedulable entities of this group on each cpu */
 	struct sched_entity **se;
 	/* runqueue "owned" by this group on each cpu */
@@ -197,6 +204,9 @@
 
 #ifdef CONFIG_FAIR_USER_SCHED
 	tg = p->user->tg;
+#elif defined(CONFIG_FAIR_CGROUP_SCHED)
+	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
+				struct task_group, css);
 #else
 	tg  = &init_task_group;
 #endif
@@ -1875,7 +1885,7 @@
 	preempt_enable();
 #endif
 	if (current->set_child_tid)
-		put_user(current->pid, current->set_child_tid);
+		put_user(task_pid_vnr(current), current->set_child_tid);
 }
 
 /*
@@ -3307,9 +3317,13 @@
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	cputime64_t tmp;
+	struct rq *rq = this_rq();
 
 	p->utime = cputime_add(p->utime, cputime);
 
+	if (p != rq->idle)
+		cpuacct_charge(p, cputime);
+
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
 	if (TASK_NICE(p) > 0)
@@ -3374,9 +3388,10 @@
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
 	else if (softirq_count())
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-	else if (p != rq->idle)
+	else if (p != rq->idle) {
 		cpustat->system = cputime64_add(cpustat->system, tmp);
-	else if (atomic_read(&rq->nr_iowait) > 0)
+		cpuacct_charge(p, cputime);
+	} else if (atomic_read(&rq->nr_iowait) > 0)
 		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 	else
 		cpustat->idle = cputime64_add(cpustat->idle, tmp);
@@ -3412,8 +3427,10 @@
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 		else
 			cpustat->idle = cputime64_add(cpustat->idle, tmp);
-	} else
+	} else {
 		cpustat->steal = cputime64_add(cpustat->steal, tmp);
+		cpuacct_charge(p, -tmp);
+	}
 }
 
 /*
@@ -3493,7 +3510,7 @@
 static noinline void __schedule_bug(struct task_struct *prev)
 {
 	printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d\n",
-		prev->comm, preempt_count(), prev->pid);
+		prev->comm, preempt_count(), task_pid_nr(prev));
 	debug_show_held_locks(prev);
 	if (irqs_disabled())
 		print_irqtrace_events(prev);
@@ -4159,7 +4176,7 @@
  */
 static struct task_struct *find_process_by_pid(pid_t pid)
 {
-	return pid ? find_task_by_pid(pid) : current;
+	return pid ? find_task_by_vpid(pid) : current;
 }
 
 /* Actually do priority change: must hold rq lock. */
@@ -4462,8 +4479,21 @@
 
 	cpus_allowed = cpuset_cpus_allowed(p);
 	cpus_and(new_mask, new_mask, cpus_allowed);
+ again:
 	retval = set_cpus_allowed(p, new_mask);
 
+	if (!retval) {
+		cpus_allowed = cpuset_cpus_allowed(p);
+		if (!cpus_subset(new_mask, cpus_allowed)) {
+			/*
+			 * We must have raced with a concurrent cpuset
+			 * update. Just reset the cpus_allowed to the
+			 * cpuset's cpus_allowed
+			 */
+			new_mask = cpus_allowed;
+			goto again;
+		}
+	}
 out_unlock:
 	put_task_struct(p);
 	mutex_unlock(&sched_hotcpu_mutex);
@@ -4843,7 +4873,8 @@
 		free = (unsigned long)n - (unsigned long)end_of_stack(p);
 	}
 #endif
-	printk(KERN_CONT "%5lu %5d %6d\n", free, p->pid, p->parent->pid);
+	printk(KERN_CONT "%5lu %5d %6d\n", free,
+		task_pid_nr(p), task_pid_nr(p->parent));
 
 	if (state != TASK_RUNNING)
 		show_stack(p, NULL);
@@ -5137,8 +5168,16 @@
 
 		/* No more Mr. Nice Guy. */
 		if (dest_cpu == NR_CPUS) {
+			cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p);
+			/*
+			 * Try to stay on the same cpuset, where the
+			 * current cpuset may be a subset of all cpus.
+			 * The cpuset_cpus_allowed_locked() variant of
+			 * cpuset_cpus_allowed() will not block.  It must be
+			 * called within calls to cpuset_lock/cpuset_unlock.
+			 */
 			rq = task_rq_lock(p, &flags);
-			cpus_setall(p->cpus_allowed);
+			p->cpus_allowed = cpus_allowed;
 			dest_cpu = any_online_cpu(p->cpus_allowed);
 			task_rq_unlock(rq, &flags);
 
@@ -5150,7 +5189,7 @@
 			if (p->mm && printk_ratelimit())
 				printk(KERN_INFO "process %d (%s) no "
 				       "longer affine to cpu%d\n",
-				       p->pid, p->comm, dead_cpu);
+			       task_pid_nr(p), p->comm, dead_cpu);
 		}
 	} while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
 }
@@ -5257,7 +5296,7 @@
 	struct rq *rq = cpu_rq(dead_cpu);
 
 	/* Must be exiting, otherwise would be on tasklist. */
-	BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
+	BUG_ON(!p->exit_state);
 
 	/* Cannot have done final schedule yet: would have vanished. */
 	BUG_ON(p->state == TASK_DEAD);
@@ -5504,6 +5543,7 @@
 
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
+		cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
 		migrate_live_tasks(cpu);
 		rq = cpu_rq(cpu);
 		kthread_stop(rq->migration_thread);
@@ -5517,6 +5557,7 @@
 		rq->idle->sched_class = &idle_sched_class;
 		migrate_dead_tasks(cpu);
 		spin_unlock_irq(&rq->lock);
+		cpuset_unlock();
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
 
@@ -6367,26 +6408,31 @@
 	return -ENOMEM;
 #endif
 }
+
+static cpumask_t *doms_cur;	/* current sched domains */
+static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
+
+/*
+ * Special case: If a kmalloc of a doms_cur partition (array of
+ * cpumask_t) fails, then fallback to a single sched domain,
+ * as determined by the single cpumask_t fallback_doms.
+ */
+static cpumask_t fallback_doms;
+
 /*
  * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
+ * For now this just excludes isolated cpus, but could be used to
+ * exclude other special cases in the future.
  */
 static int arch_init_sched_domains(const cpumask_t *cpu_map)
 {
-	cpumask_t cpu_default_map;
-	int err;
-
-	/*
-	 * Setup mask for cpus without special case scheduling requirements.
-	 * For now this just excludes isolated cpus, but could be used to
-	 * exclude other special cases in the future.
-	 */
-	cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
-
-	err = build_sched_domains(&cpu_default_map);
-
+	ndoms_cur = 1;
+	doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+	if (!doms_cur)
+		doms_cur = &fallback_doms;
+	cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
 	register_sched_domain_sysctl();
-
-	return err;
+	return build_sched_domains(doms_cur);
 }
 
 static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
@@ -6410,6 +6456,68 @@
 	arch_destroy_sched_domains(cpu_map);
 }
 
+/*
+ * Partition sched domains as specified by the 'ndoms_new'
+ * cpumasks in the array doms_new[] of cpumasks.  This compares
+ * doms_new[] to the current sched domain partitioning, doms_cur[].
+ * It destroys each deleted domain and builds each new domain.
+ *
+ * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
+ * The masks don't intersect (don't overlap.)  We should setup one
+ * sched domain for each mask.  CPUs not in any of the cpumasks will
+ * not be load balanced.  If the same cpumask appears both in the
+ * current 'doms_cur' domains and in the new 'doms_new', we can leave
+ * it as it is.
+ *
+ * The passed in 'doms_new' should be kmalloc'd.  This routine takes
+ * ownership of it and will kfree it when done with it.  If the caller
+ * failed the kmalloc call, then it can pass in doms_new == NULL,
+ * and partition_sched_domains() will fallback to the single partition
+ * 'fallback_doms'.
+ *
+ * Call with hotplug lock held
+ */
+void partition_sched_domains(int ndoms_new, cpumask_t *doms_new)
+{
+	int i, j;
+
+	if (doms_new == NULL) {
+		ndoms_new = 1;
+		doms_new = &fallback_doms;
+		cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+	}
+
+	/* Destroy deleted domains */
+	for (i = 0; i < ndoms_cur; i++) {
+		for (j = 0; j < ndoms_new; j++) {
+			if (cpus_equal(doms_cur[i], doms_new[j]))
+				goto match1;
+		}
+		/* no match - a current sched domain not in new doms_new[] */
+		detach_destroy_domains(doms_cur + i);
+match1:
+		;
+	}
+
+	/* Build new domains */
+	for (i = 0; i < ndoms_new; i++) {
+		for (j = 0; j < ndoms_cur; j++) {
+			if (cpus_equal(doms_new[i], doms_cur[j]))
+				goto match2;
+		}
+		/* no match - add a new doms_new */
+		build_sched_domains(doms_new + i);
+match2:
+		;
+	}
+
+	/* Remember the new sched domains */
+	if (doms_cur != &fallback_doms)
+		kfree(doms_cur);
+	doms_cur = doms_new;
+	ndoms_cur = ndoms_new;
+}
+
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 static int arch_reinit_sched_domains(void)
 {
@@ -6991,3 +7099,116 @@
 }
 
 #endif	/* CONFIG_FAIR_GROUP_SCHED */
+
+#ifdef CONFIG_FAIR_CGROUP_SCHED
+
+/* return corresponding task_group object of a cgroup */
+static inline struct task_group *cgroup_tg(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, cpu_cgroup_subsys_id),
+					 struct task_group, css);
+}
+
+static struct cgroup_subsys_state *
+cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct task_group *tg;
+
+	if (!cont->parent) {
+		/* This is early initialization for the top cgroup */
+		init_task_group.css.cgroup = cont;
+		return &init_task_group.css;
+	}
+
+	/* we support only 1-level deep hierarchical scheduler atm */
+	if (cont->parent->parent)
+		return ERR_PTR(-EINVAL);
+
+	tg = sched_create_group();
+	if (IS_ERR(tg))
+		return ERR_PTR(-ENOMEM);
+
+	/* Bind the cgroup to task_group object we just created */
+	tg->css.cgroup = cont;
+
+	return &tg->css;
+}
+
+static void cpu_cgroup_destroy(struct cgroup_subsys *ss,
+					struct cgroup *cont)
+{
+	struct task_group *tg = cgroup_tg(cont);
+
+	sched_destroy_group(tg);
+}
+
+static int cpu_cgroup_can_attach(struct cgroup_subsys *ss,
+			     struct cgroup *cont, struct task_struct *tsk)
+{
+	/* We don't support RT-tasks being in separate groups */
+	if (tsk->sched_class != &fair_sched_class)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void
+cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+			struct cgroup *old_cont, struct task_struct *tsk)
+{
+	sched_move_task(tsk);
+}
+
+static ssize_t cpu_shares_write(struct cgroup *cont, struct cftype *cftype,
+				struct file *file, const char __user *userbuf,
+				size_t nbytes, loff_t *ppos)
+{
+	unsigned long shareval;
+	struct task_group *tg = cgroup_tg(cont);
+	char buffer[2*sizeof(unsigned long) + 1];
+	int rc;
+
+	if (nbytes > 2*sizeof(unsigned long))	/* safety check */
+		return -E2BIG;
+
+	if (copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;	/* nul-terminate */
+	shareval = simple_strtoul(buffer, NULL, 10);
+
+	rc = sched_group_set_shares(tg, shareval);
+
+	return (rc < 0 ? rc : nbytes);
+}
+
+static u64 cpu_shares_read_uint(struct cgroup *cont, struct cftype *cft)
+{
+	struct task_group *tg = cgroup_tg(cont);
+
+	return (u64) tg->shares;
+}
+
+static struct cftype cpu_shares = {
+	.name = "shares",
+	.read_uint = cpu_shares_read_uint,
+	.write = cpu_shares_write,
+};
+
+static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_file(cont, ss, &cpu_shares);
+}
+
+struct cgroup_subsys cpu_cgroup_subsys = {
+	.name 	    	= "cpu",
+	.create	    	= cpu_cgroup_create,
+	.destroy    	= cpu_cgroup_destroy,
+	.can_attach 	= cpu_cgroup_can_attach,
+	.attach     	= cpu_cgroup_attach,
+	.populate   	= cpu_cgroup_populate,
+	.subsys_id  	= cpu_cgroup_subsys_id,
+	.early_init	= 1,
+};
+
+#endif	/* CONFIG_FAIR_CGROUP_SCHED */
diff --git a/kernel/signal.c b/kernel/signal.c
index e4f059c..1200630 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -256,7 +256,7 @@
 
 int unhandled_signal(struct task_struct *tsk, int sig)
 {
-	if (is_init(tsk))
+	if (is_global_init(tsk))
 		return 1;
 	if (tsk->ptrace & PT_PTRACED)
 		return 0;
@@ -536,7 +536,7 @@
 			return error;
 		error = -EPERM;
 		if (((sig != SIGCONT) ||
-			(process_session(current) != process_session(t)))
+			(task_session_nr(current) != task_session_nr(t)))
 		    && (current->euid ^ t->suid) && (current->euid ^ t->uid)
 		    && (current->uid ^ t->suid) && (current->uid ^ t->uid)
 		    && !capable(CAP_KILL))
@@ -694,7 +694,7 @@
 			q->info.si_signo = sig;
 			q->info.si_errno = 0;
 			q->info.si_code = SI_USER;
-			q->info.si_pid = current->pid;
+			q->info.si_pid = task_pid_vnr(current);
 			q->info.si_uid = current->uid;
 			break;
 		case (unsigned long) SEND_SIG_PRIV:
@@ -730,7 +730,7 @@
 static void print_fatal_signal(struct pt_regs *regs, int signr)
 {
 	printk("%s/%d: potentially unexpected fatal signal %d.\n",
-		current->comm, current->pid, signr);
+		current->comm, task_pid_nr(current), signr);
 
 #ifdef __i386__
 	printk("code at %08lx: ", regs->eip);
@@ -1089,7 +1089,7 @@
 {
 	int error;
 	rcu_read_lock();
-	error = kill_pid_info(sig, info, find_pid(pid));
+	error = kill_pid_info(sig, info, find_vpid(pid));
 	rcu_read_unlock();
 	return error;
 }
@@ -1150,7 +1150,7 @@
 
 		read_lock(&tasklist_lock);
 		for_each_process(p) {
-			if (p->pid > 1 && p->tgid != current->tgid) {
+			if (p->pid > 1 && !same_thread_group(p, current)) {
 				int err = group_send_sig_info(sig, info, p);
 				++count;
 				if (err != -EPERM)
@@ -1160,9 +1160,9 @@
 		read_unlock(&tasklist_lock);
 		ret = count ? retval : -ESRCH;
 	} else if (pid < 0) {
-		ret = kill_pgrp_info(sig, info, find_pid(-pid));
+		ret = kill_pgrp_info(sig, info, find_vpid(-pid));
 	} else {
-		ret = kill_pid_info(sig, info, find_pid(pid));
+		ret = kill_pid_info(sig, info, find_vpid(pid));
 	}
 	rcu_read_unlock();
 	return ret;
@@ -1266,7 +1266,12 @@
 int
 kill_proc(pid_t pid, int sig, int priv)
 {
-	return kill_proc_info(sig, __si_special(priv), pid);
+	int ret;
+
+	rcu_read_lock();
+	ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
+	rcu_read_unlock();
+	return ret;
 }
 
 /*
@@ -1443,7 +1448,22 @@
 
 	info.si_signo = sig;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	/*
+	 * we are under tasklist_lock here so our parent is tied to
+	 * us and cannot exit and release its namespace.
+	 *
+	 * the only it can is to switch its nsproxy with sys_unshare,
+	 * bu uncharing pid namespaces is not allowed, so we'll always
+	 * see relevant namespace
+	 *
+	 * write_lock() currently calls preempt_disable() which is the
+	 * same as rcu_read_lock(), but according to Oleg, this is not
+	 * correct to rely on this
+	 */
+	rcu_read_lock();
+	info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+	rcu_read_unlock();
+
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1508,7 +1528,13 @@
 
 	info.si_signo = SIGCHLD;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	/*
+	 * see comment in do_notify_parent() abot the following 3 lines
+	 */
+	rcu_read_lock();
+	info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+	rcu_read_unlock();
+
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1634,7 +1660,7 @@
 	memset(&info, 0, sizeof info);
 	info.si_signo = SIGTRAP;
 	info.si_code = exit_code;
-	info.si_pid = current->pid;
+	info.si_pid = task_pid_vnr(current);
 	info.si_uid = current->uid;
 
 	/* Let the debugger run.  */
@@ -1804,7 +1830,7 @@
 				info->si_signo = signr;
 				info->si_errno = 0;
 				info->si_code = SI_USER;
-				info->si_pid = current->parent->pid;
+				info->si_pid = task_pid_vnr(current->parent);
 				info->si_uid = current->parent->uid;
 			}
 
@@ -1835,11 +1861,9 @@
 			continue;
 
 		/*
-		 * Init of a pid space gets no signals it doesn't want from
-		 * within that pid space. It can of course get signals from
-		 * its parent pid space.
+		 * Global init gets no signals it doesn't want.
 		 */
-		if (current == child_reaper(current))
+		if (is_global_init(current))
 			continue;
 
 		if (sig_kernel_stop(signr)) {
@@ -2193,7 +2217,7 @@
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_USER;
-	info.si_pid = current->tgid;
+	info.si_pid = task_tgid_vnr(current);
 	info.si_uid = current->uid;
 
 	return kill_something_info(sig, &info, pid);
@@ -2209,12 +2233,12 @@
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_TKILL;
-	info.si_pid = current->tgid;
+	info.si_pid = task_tgid_vnr(current);
 	info.si_uid = current->uid;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
-	if (p && (tgid <= 0 || p->tgid == tgid)) {
+	p = find_task_by_vpid(pid);
+	if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
 		error = check_kill_permission(sig, &info, p);
 		/*
 		 * The null signal is a permissions and process existence
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index edeeef3..11df812 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -113,7 +113,7 @@
 	spin_lock(&print_lock);
 	printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
 			this_cpu, now - touch_timestamp,
-				current->comm, current->pid);
+			current->comm, task_pid_nr(current));
 	if (regs)
 		show_regs(regs);
 	else
diff --git a/kernel/sys.c b/kernel/sys.c
index bc8879c8..304b541 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -106,537 +106,6 @@
 
 void (*pm_power_off_prepare)(void);
 
-/*
- *	Notifier list for kernel code which wants to be called
- *	at shutdown. This is used to stop any idling DMA operations
- *	and the like. 
- */
-
-static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
-
-/*
- *	Notifier chain core routines.  The exported routines below
- *	are layered on top of these, with appropriate locking added.
- */
-
-static int notifier_chain_register(struct notifier_block **nl,
-		struct notifier_block *n)
-{
-	while ((*nl) != NULL) {
-		if (n->priority > (*nl)->priority)
-			break;
-		nl = &((*nl)->next);
-	}
-	n->next = *nl;
-	rcu_assign_pointer(*nl, n);
-	return 0;
-}
-
-static int notifier_chain_unregister(struct notifier_block **nl,
-		struct notifier_block *n)
-{
-	while ((*nl) != NULL) {
-		if ((*nl) == n) {
-			rcu_assign_pointer(*nl, n->next);
-			return 0;
-		}
-		nl = &((*nl)->next);
-	}
-	return -ENOENT;
-}
-
-/**
- * notifier_call_chain - Informs the registered notifiers about an event.
- *	@nl:		Pointer to head of the blocking notifier chain
- *	@val:		Value passed unmodified to notifier function
- *	@v:		Pointer passed unmodified to notifier function
- *	@nr_to_call:	Number of notifier functions to be called. Don't care
- *		     	value of this parameter is -1.
- *	@nr_calls:	Records the number of notifications sent. Don't care
- *		   	value of this field is NULL.
- * 	@returns:	notifier_call_chain returns the value returned by the
- *			last notifier function called.
- */
-
-static int __kprobes notifier_call_chain(struct notifier_block **nl,
-					unsigned long val, void *v,
-					int nr_to_call,	int *nr_calls)
-{
-	int ret = NOTIFY_DONE;
-	struct notifier_block *nb, *next_nb;
-
-	nb = rcu_dereference(*nl);
-
-	while (nb && nr_to_call) {
-		next_nb = rcu_dereference(nb->next);
-		ret = nb->notifier_call(nb, val, v);
-
-		if (nr_calls)
-			(*nr_calls)++;
-
-		if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
-			break;
-		nb = next_nb;
-		nr_to_call--;
-	}
-	return ret;
-}
-
-/*
- *	Atomic notifier chain routines.  Registration and unregistration
- *	use a spinlock, and call_chain is synchronized by RCU (no locks).
- */
-
-/**
- *	atomic_notifier_chain_register - Add notifier to an atomic notifier chain
- *	@nh: Pointer to head of the atomic notifier chain
- *	@n: New entry in notifier chain
- *
- *	Adds a notifier to an atomic notifier chain.
- *
- *	Currently always returns zero.
- */
-
-int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
-		struct notifier_block *n)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&nh->lock, flags);
-	ret = notifier_chain_register(&nh->head, n);
-	spin_unlock_irqrestore(&nh->lock, flags);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
-
-/**
- *	atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
- *	@nh: Pointer to head of the atomic notifier chain
- *	@n: Entry to remove from notifier chain
- *
- *	Removes a notifier from an atomic notifier chain.
- *
- *	Returns zero on success or %-ENOENT on failure.
- */
-int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
-		struct notifier_block *n)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&nh->lock, flags);
-	ret = notifier_chain_unregister(&nh->head, n);
-	spin_unlock_irqrestore(&nh->lock, flags);
-	synchronize_rcu();
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
-
-/**
- *	__atomic_notifier_call_chain - Call functions in an atomic notifier chain
- *	@nh: Pointer to head of the atomic notifier chain
- *	@val: Value passed unmodified to notifier function
- *	@v: Pointer passed unmodified to notifier function
- *	@nr_to_call: See the comment for notifier_call_chain.
- *	@nr_calls: See the comment for notifier_call_chain.
- *
- *	Calls each function in a notifier chain in turn.  The functions
- *	run in an atomic context, so they must not block.
- *	This routine uses RCU to synchronize with changes to the chain.
- *
- *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
- *	will return immediately, with the return value of
- *	the notifier function which halted execution.
- *	Otherwise the return value is the return value
- *	of the last notifier function called.
- */
- 
-int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-					unsigned long val, void *v,
-					int nr_to_call, int *nr_calls)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
-	rcu_read_unlock();
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
-
-int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-		unsigned long val, void *v)
-{
-	return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
-}
-
-EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
-/*
- *	Blocking notifier chain routines.  All access to the chain is
- *	synchronized by an rwsem.
- */
-
-/**
- *	blocking_notifier_chain_register - Add notifier to a blocking notifier chain
- *	@nh: Pointer to head of the blocking notifier chain
- *	@n: New entry in notifier chain
- *
- *	Adds a notifier to a blocking notifier chain.
- *	Must be called in process context.
- *
- *	Currently always returns zero.
- */
- 
-int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
-		struct notifier_block *n)
-{
-	int ret;
-
-	/*
-	 * This code gets used during boot-up, when task switching is
-	 * not yet working and interrupts must remain disabled.  At
-	 * such times we must not call down_write().
-	 */
-	if (unlikely(system_state == SYSTEM_BOOTING))
-		return notifier_chain_register(&nh->head, n);
-
-	down_write(&nh->rwsem);
-	ret = notifier_chain_register(&nh->head, n);
-	up_write(&nh->rwsem);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
-
-/**
- *	blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
- *	@nh: Pointer to head of the blocking notifier chain
- *	@n: Entry to remove from notifier chain
- *
- *	Removes a notifier from a blocking notifier chain.
- *	Must be called from process context.
- *
- *	Returns zero on success or %-ENOENT on failure.
- */
-int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
-		struct notifier_block *n)
-{
-	int ret;
-
-	/*
-	 * This code gets used during boot-up, when task switching is
-	 * not yet working and interrupts must remain disabled.  At
-	 * such times we must not call down_write().
-	 */
-	if (unlikely(system_state == SYSTEM_BOOTING))
-		return notifier_chain_unregister(&nh->head, n);
-
-	down_write(&nh->rwsem);
-	ret = notifier_chain_unregister(&nh->head, n);
-	up_write(&nh->rwsem);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
-
-/**
- *	__blocking_notifier_call_chain - Call functions in a blocking notifier chain
- *	@nh: Pointer to head of the blocking notifier chain
- *	@val: Value passed unmodified to notifier function
- *	@v: Pointer passed unmodified to notifier function
- *	@nr_to_call: See comment for notifier_call_chain.
- *	@nr_calls: See comment for notifier_call_chain.
- *
- *	Calls each function in a notifier chain in turn.  The functions
- *	run in a process context, so they are allowed to block.
- *
- *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
- *	will return immediately, with the return value of
- *	the notifier function which halted execution.
- *	Otherwise the return value is the return value
- *	of the last notifier function called.
- */
- 
-int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
-				   unsigned long val, void *v,
-				   int nr_to_call, int *nr_calls)
-{
-	int ret = NOTIFY_DONE;
-
-	/*
-	 * We check the head outside the lock, but if this access is
-	 * racy then it does not matter what the result of the test
-	 * is, we re-check the list after having taken the lock anyway:
-	 */
-	if (rcu_dereference(nh->head)) {
-		down_read(&nh->rwsem);
-		ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
-					nr_calls);
-		up_read(&nh->rwsem);
-	}
-	return ret;
-}
-EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
-
-int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
-		unsigned long val, void *v)
-{
-	return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
-}
-EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
-
-/*
- *	Raw notifier chain routines.  There is no protection;
- *	the caller must provide it.  Use at your own risk!
- */
-
-/**
- *	raw_notifier_chain_register - Add notifier to a raw notifier chain
- *	@nh: Pointer to head of the raw notifier chain
- *	@n: New entry in notifier chain
- *
- *	Adds a notifier to a raw notifier chain.
- *	All locking must be provided by the caller.
- *
- *	Currently always returns zero.
- */
-
-int raw_notifier_chain_register(struct raw_notifier_head *nh,
-		struct notifier_block *n)
-{
-	return notifier_chain_register(&nh->head, n);
-}
-
-EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
-
-/**
- *	raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
- *	@nh: Pointer to head of the raw notifier chain
- *	@n: Entry to remove from notifier chain
- *
- *	Removes a notifier from a raw notifier chain.
- *	All locking must be provided by the caller.
- *
- *	Returns zero on success or %-ENOENT on failure.
- */
-int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
-		struct notifier_block *n)
-{
-	return notifier_chain_unregister(&nh->head, n);
-}
-
-EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
-
-/**
- *	__raw_notifier_call_chain - Call functions in a raw notifier chain
- *	@nh: Pointer to head of the raw notifier chain
- *	@val: Value passed unmodified to notifier function
- *	@v: Pointer passed unmodified to notifier function
- *	@nr_to_call: See comment for notifier_call_chain.
- *	@nr_calls: See comment for notifier_call_chain
- *
- *	Calls each function in a notifier chain in turn.  The functions
- *	run in an undefined context.
- *	All locking must be provided by the caller.
- *
- *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
- *	will return immediately, with the return value of
- *	the notifier function which halted execution.
- *	Otherwise the return value is the return value
- *	of the last notifier function called.
- */
-
-int __raw_notifier_call_chain(struct raw_notifier_head *nh,
-			      unsigned long val, void *v,
-			      int nr_to_call, int *nr_calls)
-{
-	return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
-}
-
-EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
-
-int raw_notifier_call_chain(struct raw_notifier_head *nh,
-		unsigned long val, void *v)
-{
-	return __raw_notifier_call_chain(nh, val, v, -1, NULL);
-}
-
-EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
-
-/*
- *	SRCU notifier chain routines.    Registration and unregistration
- *	use a mutex, and call_chain is synchronized by SRCU (no locks).
- */
-
-/**
- *	srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
- *	@nh: Pointer to head of the SRCU notifier chain
- *	@n: New entry in notifier chain
- *
- *	Adds a notifier to an SRCU notifier chain.
- *	Must be called in process context.
- *
- *	Currently always returns zero.
- */
-
-int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
-		struct notifier_block *n)
-{
-	int ret;
-
-	/*
-	 * This code gets used during boot-up, when task switching is
-	 * not yet working and interrupts must remain disabled.  At
-	 * such times we must not call mutex_lock().
-	 */
-	if (unlikely(system_state == SYSTEM_BOOTING))
-		return notifier_chain_register(&nh->head, n);
-
-	mutex_lock(&nh->mutex);
-	ret = notifier_chain_register(&nh->head, n);
-	mutex_unlock(&nh->mutex);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
-
-/**
- *	srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
- *	@nh: Pointer to head of the SRCU notifier chain
- *	@n: Entry to remove from notifier chain
- *
- *	Removes a notifier from an SRCU notifier chain.
- *	Must be called from process context.
- *
- *	Returns zero on success or %-ENOENT on failure.
- */
-int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
-		struct notifier_block *n)
-{
-	int ret;
-
-	/*
-	 * This code gets used during boot-up, when task switching is
-	 * not yet working and interrupts must remain disabled.  At
-	 * such times we must not call mutex_lock().
-	 */
-	if (unlikely(system_state == SYSTEM_BOOTING))
-		return notifier_chain_unregister(&nh->head, n);
-
-	mutex_lock(&nh->mutex);
-	ret = notifier_chain_unregister(&nh->head, n);
-	mutex_unlock(&nh->mutex);
-	synchronize_srcu(&nh->srcu);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
-
-/**
- *	__srcu_notifier_call_chain - Call functions in an SRCU notifier chain
- *	@nh: Pointer to head of the SRCU notifier chain
- *	@val: Value passed unmodified to notifier function
- *	@v: Pointer passed unmodified to notifier function
- *	@nr_to_call: See comment for notifier_call_chain.
- *	@nr_calls: See comment for notifier_call_chain
- *
- *	Calls each function in a notifier chain in turn.  The functions
- *	run in a process context, so they are allowed to block.
- *
- *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
- *	will return immediately, with the return value of
- *	the notifier function which halted execution.
- *	Otherwise the return value is the return value
- *	of the last notifier function called.
- */
-
-int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
-			       unsigned long val, void *v,
-			       int nr_to_call, int *nr_calls)
-{
-	int ret;
-	int idx;
-
-	idx = srcu_read_lock(&nh->srcu);
-	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
-	srcu_read_unlock(&nh->srcu, idx);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
-
-int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
-		unsigned long val, void *v)
-{
-	return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
-}
-EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
-
-/**
- *	srcu_init_notifier_head - Initialize an SRCU notifier head
- *	@nh: Pointer to head of the srcu notifier chain
- *
- *	Unlike other sorts of notifier heads, SRCU notifier heads require
- *	dynamic initialization.  Be sure to call this routine before
- *	calling any of the other SRCU notifier routines for this head.
- *
- *	If an SRCU notifier head is deallocated, it must first be cleaned
- *	up by calling srcu_cleanup_notifier_head().  Otherwise the head's
- *	per-cpu data (used by the SRCU mechanism) will leak.
- */
-
-void srcu_init_notifier_head(struct srcu_notifier_head *nh)
-{
-	mutex_init(&nh->mutex);
-	if (init_srcu_struct(&nh->srcu) < 0)
-		BUG();
-	nh->head = NULL;
-}
-
-EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
-
-/**
- *	register_reboot_notifier - Register function to be called at reboot time
- *	@nb: Info about notifier function to be called
- *
- *	Registers a function with the list of functions
- *	to be called at reboot time.
- *
- *	Currently always returns zero, as blocking_notifier_chain_register()
- *	always returns zero.
- */
- 
-int register_reboot_notifier(struct notifier_block * nb)
-{
-	return blocking_notifier_chain_register(&reboot_notifier_list, nb);
-}
-
-EXPORT_SYMBOL(register_reboot_notifier);
-
-/**
- *	unregister_reboot_notifier - Unregister previously registered reboot notifier
- *	@nb: Hook to be unregistered
- *
- *	Unregisters a previously registered reboot
- *	notifier function.
- *
- *	Returns zero on success, or %-ENOENT on failure.
- */
- 
-int unregister_reboot_notifier(struct notifier_block * nb)
-{
-	return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
-}
-
-EXPORT_SYMBOL(unregister_reboot_notifier);
-
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
 	int no_nice;
@@ -683,7 +152,7 @@
 	switch (which) {
 		case PRIO_PROCESS:
 			if (who)
-				p = find_task_by_pid(who);
+				p = find_task_by_vpid(who);
 			else
 				p = current;
 			if (p)
@@ -691,7 +160,7 @@
 			break;
 		case PRIO_PGRP:
 			if (who)
-				pgrp = find_pid(who);
+				pgrp = find_vpid(who);
 			else
 				pgrp = task_pgrp(current);
 			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
@@ -740,7 +209,7 @@
 	switch (which) {
 		case PRIO_PROCESS:
 			if (who)
-				p = find_task_by_pid(who);
+				p = find_task_by_vpid(who);
 			else
 				p = current;
 			if (p) {
@@ -751,7 +220,7 @@
 			break;
 		case PRIO_PGRP:
 			if (who)
-				pgrp = find_pid(who);
+				pgrp = find_vpid(who);
 			else
 				pgrp = task_pgrp(current);
 			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
@@ -1448,9 +917,10 @@
 	struct task_struct *p;
 	struct task_struct *group_leader = current->group_leader;
 	int err = -EINVAL;
+	struct pid_namespace *ns;
 
 	if (!pid)
-		pid = group_leader->pid;
+		pid = task_pid_vnr(group_leader);
 	if (!pgid)
 		pgid = pid;
 	if (pgid < 0)
@@ -1459,10 +929,12 @@
 	/* From this point forward we keep holding onto the tasklist lock
 	 * so that our parent does not change from under us. -DaveM
 	 */
+	ns = current->nsproxy->pid_ns;
+
 	write_lock_irq(&tasklist_lock);
 
 	err = -ESRCH;
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ns(pid, ns);
 	if (!p)
 		goto out;
 
@@ -1488,9 +960,9 @@
 		goto out;
 
 	if (pgid != pid) {
-		struct task_struct *g =
-			find_task_by_pid_type(PIDTYPE_PGID, pgid);
+		struct task_struct *g;
 
+		g = find_task_by_pid_type_ns(PIDTYPE_PGID, pgid, ns);
 		if (!g || task_session(g) != task_session(group_leader))
 			goto out;
 	}
@@ -1499,10 +971,13 @@
 	if (err)
 		goto out;
 
-	if (process_group(p) != pgid) {
+	if (task_pgrp_nr_ns(p, ns) != pgid) {
+		struct pid *pid;
+
 		detach_pid(p, PIDTYPE_PGID);
-		p->signal->pgrp = pgid;
-		attach_pid(p, PIDTYPE_PGID, find_pid(pgid));
+		pid = find_vpid(pgid);
+		attach_pid(p, PIDTYPE_PGID, pid);
+		set_task_pgrp(p, pid_nr(pid));
 	}
 
 	err = 0;
@@ -1515,19 +990,21 @@
 asmlinkage long sys_getpgid(pid_t pid)
 {
 	if (!pid)
-		return process_group(current);
+		return task_pgrp_vnr(current);
 	else {
 		int retval;
 		struct task_struct *p;
+		struct pid_namespace *ns;
+
+		ns = current->nsproxy->pid_ns;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
-
+		p = find_task_by_pid_ns(pid, ns);
 		retval = -ESRCH;
 		if (p) {
 			retval = security_task_getpgid(p);
 			if (!retval)
-				retval = process_group(p);
+				retval = task_pgrp_nr_ns(p, ns);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1539,7 +1016,7 @@
 asmlinkage long sys_getpgrp(void)
 {
 	/* SMP - assuming writes are word atomic this is fine */
-	return process_group(current);
+	return task_pgrp_vnr(current);
 }
 
 #endif
@@ -1547,19 +1024,21 @@
 asmlinkage long sys_getsid(pid_t pid)
 {
 	if (!pid)
-		return process_session(current);
+		return task_session_vnr(current);
 	else {
 		int retval;
 		struct task_struct *p;
+		struct pid_namespace *ns;
+
+		ns = current->nsproxy->pid_ns;
 
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
-
+		p = find_task_by_pid_ns(pid, ns);
 		retval = -ESRCH;
 		if (p) {
 			retval = security_task_getsid(p);
 			if (!retval)
-				retval = process_session(p);
+				retval = task_session_nr_ns(p, ns);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1586,7 +1065,8 @@
 	 * session id and so the check will always fail and make it so
 	 * init cannot successfully call setsid.
 	 */
-	if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session))
+	if (session > 1 && find_task_by_pid_type_ns(PIDTYPE_PGID,
+				session, &init_pid_ns))
 		goto out;
 
 	group_leader->signal->leader = 1;
@@ -1596,7 +1076,7 @@
 	group_leader->signal->tty = NULL;
 	spin_unlock(&group_leader->sighand->siglock);
 
-	err = process_group(group_leader);
+	err = task_pgrp_vnr(group_leader);
 out:
 	write_unlock_irq(&tasklist_lock);
 	return err;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 067554b..3b4efbe 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1888,7 +1888,7 @@
 		return -EPERM;
 	}
 
-	op = is_init(current) ? OP_SET : OP_AND;
+	op = is_global_init(current) ? OP_SET : OP_AND;
 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
 				do_proc_dointvec_bset_conv,&op);
 }
@@ -2278,7 +2278,7 @@
 	pid_t tmp;
 	int r;
 
-	tmp = pid_nr(cad_pid);
+	tmp = pid_nr_ns(cad_pid, current->nsproxy->pid_ns);
 
 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
 			       lenp, ppos, NULL, NULL);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 7d4d7f9..9f360f6 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -22,6 +22,10 @@
 #include <linux/delayacct.h>
 #include <linux/cpumask.h>
 #include <linux/percpu.h>
+#include <linux/cgroupstats.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/file.h>
 #include <net/genetlink.h>
 #include <asm/atomic.h>
 
@@ -49,6 +53,11 @@
 	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
 	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
 
+static struct nla_policy
+cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = {
+	[CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
+};
+
 struct listener {
 	struct list_head list;
 	pid_t pid;
@@ -372,6 +381,51 @@
 	return NULL;
 }
 
+static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	int rc = 0;
+	struct sk_buff *rep_skb;
+	struct cgroupstats *stats;
+	struct nlattr *na;
+	size_t size;
+	u32 fd;
+	struct file *file;
+	int fput_needed;
+
+	na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
+	if (!na)
+		return -EINVAL;
+
+	fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
+	file = fget_light(fd, &fput_needed);
+	if (file) {
+		size = nla_total_size(sizeof(struct cgroupstats));
+
+		rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
+					size);
+		if (rc < 0)
+			goto err;
+
+		na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
+					sizeof(struct cgroupstats));
+		stats = nla_data(na);
+		memset(stats, 0, sizeof(*stats));
+
+		rc = cgroupstats_build(stats, file->f_dentry);
+		if (rc < 0)
+			goto err;
+
+		fput_light(file, fput_needed);
+		return send_reply(rep_skb, info->snd_pid);
+	}
+
+err:
+	if (file)
+		fput_light(file, fput_needed);
+	nlmsg_free(rep_skb);
+	return rc;
+}
+
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	int rc = 0;
@@ -522,6 +576,12 @@
 	.policy		= taskstats_cmd_get_policy,
 };
 
+static struct genl_ops cgroupstats_ops = {
+	.cmd		= CGROUPSTATS_CMD_GET,
+	.doit		= cgroupstats_user_cmd,
+	.policy		= cgroupstats_cmd_get_policy,
+};
+
 /* Needed early in initialization */
 void __init taskstats_init_early(void)
 {
@@ -546,8 +606,15 @@
 	if (rc < 0)
 		goto err;
 
+	rc = genl_register_ops(&family, &cgroupstats_ops);
+	if (rc < 0)
+		goto err_cgroup_ops;
+
 	family_registered = 1;
+	printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
 	return 0;
+err_cgroup_ops:
+	genl_unregister_ops(&family, &taskstats_ops);
 err:
 	genl_unregister_family(&family);
 	return rc;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 51b6a6a..c8a9d13 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -207,15 +207,12 @@
  */
 void clocksource_resume(void)
 {
-	struct list_head *tmp;
+	struct clocksource *cs;
 	unsigned long flags;
 
 	spin_lock_irqsave(&clocksource_lock, flags);
 
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *cs;
-
-		cs = list_entry(tmp, struct clocksource, list);
+	list_for_each_entry(cs, &clocksource_list, list) {
 		if (cs->resume)
 			cs->resume();
 	}
@@ -369,7 +366,6 @@
 					  const char *buf, size_t count)
 {
 	struct clocksource *ovr = NULL;
-	struct list_head *tmp;
 	size_t ret = count;
 	int len;
 
@@ -389,12 +385,11 @@
 
 	len = strlen(override_name);
 	if (len) {
+		struct clocksource *cs;
+
 		ovr = clocksource_override;
 		/* try to select it: */
-		list_for_each(tmp, &clocksource_list) {
-			struct clocksource *cs;
-
-			cs = list_entry(tmp, struct clocksource, list);
+		list_for_each_entry(cs, &clocksource_list, list) {
 			if (strlen(cs->name) == len &&
 			    !strcmp(cs->name, override_name))
 				ovr = cs;
@@ -422,14 +417,11 @@
 static ssize_t
 sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
 {
-	struct list_head *tmp;
+	struct clocksource *src;
 	char *curr = buf;
 
 	spin_lock_irq(&clocksource_lock);
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *src;
-
-		src = list_entry(tmp, struct clocksource, list);
+	list_for_each_entry(src, &clocksource_list, list) {
 		curr += sprintf(curr, "%s ", src->name);
 	}
 	spin_unlock_irq(&clocksource_lock);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index ce89ffb..10a1347 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -153,6 +153,7 @@
 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
 	struct tick_sched *ts;
 	ktime_t last_update, expires, now, delta;
+	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
 	int cpu;
 
 	local_irq_save(flags);
@@ -302,11 +303,26 @@
 out:
 	ts->next_jiffies = next_jiffies;
 	ts->last_jiffies = last_jiffies;
+	ts->sleep_length = ktime_sub(dev->next_event, now);
 end:
 	local_irq_restore(flags);
 }
 
 /**
+ * tick_nohz_get_sleep_length - return the length of the current sleep
+ *
+ * Called from power state control code with interrupts disabled
+ */
+ktime_t tick_nohz_get_sleep_length(void)
+{
+	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+
+	return ts->sleep_length;
+}
+
+EXPORT_SYMBOL_GPL(tick_nohz_get_sleep_length);
+
+/**
  * nohz_restart_sched_tick - restart the idle tick from the idle task
  *
  * Restart the idle tick when the CPU is woken up from idle
diff --git a/kernel/timer.c b/kernel/timer.c
index 8521d10..fb4e67d 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
+#include <linux/pid_namespace.h>
 #include <linux/notifier.h>
 #include <linux/thread_info.h>
 #include <linux/time.h>
@@ -956,7 +957,7 @@
  */
 asmlinkage long sys_getpid(void)
 {
-	return current->tgid;
+	return task_tgid_vnr(current);
 }
 
 /*
@@ -970,7 +971,7 @@
 	int pid;
 
 	rcu_read_lock();
-	pid = rcu_dereference(current->real_parent)->tgid;
+	pid = task_ppid_nr_ns(current, current->nsproxy->pid_ns);
 	rcu_read_unlock();
 
 	return pid;
@@ -1102,7 +1103,7 @@
 /* Thread ID - the internal kernel "pid" */
 asmlinkage long sys_gettid(void)
 {
-	return current->pid;
+	return task_pid_vnr(current);
 }
 
 /**
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e080d1d..52d5e7c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -32,6 +32,7 @@
 #include <linux/freezer.h>
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
+#include <linux/lockdep.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -61,6 +62,9 @@
 	const char *name;
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 /* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
@@ -250,6 +254,17 @@
 		struct work_struct *work = list_entry(cwq->worklist.next,
 						struct work_struct, entry);
 		work_func_t f = work->func;
+#ifdef CONFIG_LOCKDEP
+		/*
+		 * It is permissible to free the struct work_struct
+		 * from inside the function that is called from it,
+		 * this we need to take into account for lockdep too.
+		 * To avoid bogus "held lock freed" warnings as well
+		 * as problems when looking into work->lockdep_map,
+		 * make a copy and use that here.
+		 */
+		struct lockdep_map lockdep_map = work->lockdep_map;
+#endif
 
 		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
@@ -257,13 +272,17 @@
 
 		BUG_ON(get_wq_data(work) != cwq);
 		work_clear_pending(work);
+		lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+		lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_);
 		f(work);
+		lock_release(&lockdep_map, 1, _THIS_IP_);
+		lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
 
 		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
 					"%s/0x%08x/%d\n",
 					current->comm, preempt_count(),
-				       	current->pid);
+				       	task_pid_nr(current));
 			printk(KERN_ERR "    last function: ");
 			print_symbol("%s\n", (unsigned long)f);
 			debug_show_held_locks(current);
@@ -376,6 +395,8 @@
 	int cpu;
 
 	might_sleep();
+	lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&wq->lockdep_map, 1, _THIS_IP_);
 	for_each_cpu_mask(cpu, *cpu_map)
 		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
 }
@@ -446,6 +467,9 @@
 
 	might_sleep();
 
+	lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&work->lockdep_map, 1, _THIS_IP_);
+
 	cwq = get_wq_data(work);
 	if (!cwq)
 		return;
@@ -695,8 +719,10 @@
 	}
 }
 
-struct workqueue_struct *__create_workqueue(const char *name,
-					    int singlethread, int freezeable)
+struct workqueue_struct *__create_workqueue_key(const char *name,
+						int singlethread,
+						int freezeable,
+						struct lock_class_key *key)
 {
 	struct workqueue_struct *wq;
 	struct cpu_workqueue_struct *cwq;
@@ -713,6 +739,7 @@
 	}
 
 	wq->name = name;
+	lockdep_init_map(&wq->lockdep_map, name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
 	INIT_LIST_HEAD(&wq->list);
@@ -741,7 +768,7 @@
 	}
 	return wq;
 }
-EXPORT_SYMBOL_GPL(__create_workqueue);
+EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
@@ -752,6 +779,9 @@
 	if (cwq->thread == NULL)
 		return;
 
+	lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
 	flush_cpu_workqueue(cwq);
 	/*
 	 * If the caller is CPU_DEAD and cwq->worklist was not empty,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7d16e64..c567f21 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -498,3 +498,5 @@
 	select FRAME_POINTER
 	help
 	  Provide stacktrace filter for fault-injection capabilities
+
+source "samples/Kconfig"
diff --git a/lib/Makefile b/lib/Makefile
index c5f215d..3a0983b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -6,7 +6,7 @@
 	 rbtree.o radix-tree.o dump_stack.o \
 	 idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
-	 proportions.o
+	 proportions.o prio_heap.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/hweight.c b/lib/hweight.c
index 360556a..389424e 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -1,6 +1,6 @@
 #include <linux/module.h>
+#include <linux/bitops.h>
 #include <asm/types.h>
-#include <asm/bitops.h>
 
 /**
  * hweightN - returns the hamming weight of a N-bit word
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 9659eab..393a0e9 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -124,12 +124,13 @@
 	mutex_lock(&percpu_counters_lock);
 	list_for_each_entry(fbc, &percpu_counters, list) {
 		s32 *pcount;
+		unsigned long flags;
 
-		spin_lock(&fbc->lock);
+		spin_lock_irqsave(&fbc->lock, flags);
 		pcount = per_cpu_ptr(fbc->counters, cpu);
 		fbc->count += *pcount;
 		*pcount = 0;
-		spin_unlock(&fbc->lock);
+		spin_unlock_irqrestore(&fbc->lock, flags);
 	}
 	mutex_unlock(&percpu_counters_lock);
 	return NOTIFY_OK;
diff --git a/lib/prio_heap.c b/lib/prio_heap.c
new file mode 100644
index 0000000..471944a
--- /dev/null
+++ b/lib/prio_heap.c
@@ -0,0 +1,70 @@
+/*
+ * Simple insertion-only static-sized priority heap containing
+ * pointers, based on CLR, chapter 7
+ */
+
+#include <linux/slab.h>
+#include <linux/prio_heap.h>
+
+int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
+	      int (*gt)(void *, void *))
+{
+	heap->ptrs = kmalloc(size, gfp_mask);
+	if (!heap->ptrs)
+		return -ENOMEM;
+	heap->size = 0;
+	heap->max = size / sizeof(void *);
+	heap->gt = gt;
+	return 0;
+}
+
+void heap_free(struct ptr_heap *heap)
+{
+	kfree(heap->ptrs);
+}
+
+void *heap_insert(struct ptr_heap *heap, void *p)
+{
+	void *res;
+	void **ptrs = heap->ptrs;
+	int pos;
+
+	if (heap->size < heap->max) {
+		/* Heap insertion */
+		int pos = heap->size++;
+		while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) {
+			ptrs[pos] = ptrs[(pos-1)/2];
+			pos = (pos-1)/2;
+		}
+		ptrs[pos] = p;
+		return NULL;
+	}
+
+	/* The heap is full, so something will have to be dropped */
+
+	/* If the new pointer is greater than the current max, drop it */
+	if (heap->gt(p, ptrs[0]))
+		return p;
+
+	/* Replace the current max and heapify */
+	res = ptrs[0];
+	ptrs[0] = p;
+	pos = 0;
+
+	while (1) {
+		int left = 2 * pos + 1;
+		int right = 2 * pos + 2;
+		int largest = pos;
+		if (left < heap->size && heap->gt(ptrs[left], p))
+			largest = left;
+		if (right < heap->size && heap->gt(ptrs[right], ptrs[largest]))
+			largest = right;
+		if (largest == pos)
+			break;
+		/* Push p down the heap one level and bump one up */
+		ptrs[pos] = ptrs[largest];
+		ptrs[largest] = p;
+		pos = largest;
+	}
+	return res;
+}
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 479fd46..9c4b025 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -60,12 +60,12 @@
 		owner = lock->owner;
 	printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
 		msg, raw_smp_processor_id(),
-		current->comm, current->pid);
+		current->comm, task_pid_nr(current));
 	printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, "
 			".owner_cpu: %d\n",
 		lock, lock->magic,
 		owner ? owner->comm : "<none>",
-		owner ? owner->pid : -1,
+		owner ? task_pid_nr(owner) : -1,
 		lock->owner_cpu);
 	dump_stack();
 }
@@ -116,7 +116,7 @@
 			printk(KERN_EMERG "BUG: spinlock lockup on CPU#%d, "
 					"%s/%d, %p\n",
 				raw_smp_processor_id(), current->comm,
-				current->pid, lock);
+				task_pid_nr(current), lock);
 			dump_stack();
 #ifdef CONFIG_SMP
 			trigger_all_cpu_backtrace();
@@ -161,7 +161,7 @@
 
 	printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
 		msg, raw_smp_processor_id(), current->comm,
-		current->pid, lock);
+		task_pid_nr(current), lock);
 	dump_stack();
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 9203663..5209e47 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -841,7 +841,7 @@
 /**
  * do_generic_mapping_read - generic file read routine
  * @mapping:	address_space to be read
- * @_ra:	file's readahead state
+ * @ra:		file's readahead state
  * @filp:	the file to read
  * @ppos:	current file position
  * @desc:	read_descriptor
diff --git a/mm/memory.c b/mm/memory.c
index bd16dca..142683d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -259,9 +259,6 @@
 			continue;
 		free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
 	} while (pgd++, addr = next, addr != end);
-
-	if (!(*tlb)->fullmm)
-		flush_tlb_pgtables((*tlb)->mm, start, end);
 }
 
 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 568152a..c1592a9 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -78,6 +78,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/module.h>
+#include <linux/nsproxy.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/compat.h>
@@ -940,7 +941,7 @@
 
 	/* Find the mm_struct */
 	read_lock(&tasklist_lock);
-	task = pid ? find_task_by_pid(pid) : current;
+	task = pid ? find_task_by_vpid(pid) : current;
 	if (!task) {
 		read_unlock(&tasklist_lock);
 		return -ESRCH;
@@ -1388,7 +1389,6 @@
  * keeps mempolicies cpuset relative after its cpuset moves.  See
  * further kernel/cpuset.c update_nodemask().
  */
-void *cpuset_being_rebound;
 
 /* Slow path of a mempolicy copy */
 struct mempolicy *__mpol_copy(struct mempolicy *old)
@@ -2019,4 +2019,3 @@
 		m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
 	return 0;
 }
-
diff --git a/mm/migrate.c b/mm/migrate.c
index 06d0877..4d6ee03 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -19,6 +19,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/mm_inline.h>
+#include <linux/nsproxy.h>
 #include <linux/pagevec.h>
 #include <linux/rmap.h>
 #include <linux/topology.h>
@@ -924,7 +925,7 @@
 
 	/* Find the mm_struct */
 	read_lock(&tasklist_lock);
-	task = pid ? find_task_by_pid(pid) : current;
+	task = pid ? find_task_by_vpid(pid) : current;
 	if (!task) {
 		read_unlock(&tasklist_lock);
 		return -ESRCH;
diff --git a/mm/mmap.c b/mm/mmap.c
index 4275e81..7a30c49 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1048,8 +1048,7 @@
 
 	/* The open routine did something to the protections already? */
 	if (pgprot_val(vma->vm_page_prot) !=
-	    pgprot_val(protection_map[vm_flags &
-		    (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+	    pgprot_val(vm_get_page_prot(vm_flags)))
 		return 0;
 
 	/* Specialty mapping? */
@@ -1130,8 +1129,7 @@
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_flags = vm_flags;
-	vma->vm_page_prot = protection_map[vm_flags &
-				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+	vma->vm_page_prot = vm_get_page_prot(vm_flags);
 	vma->vm_pgoff = pgoff;
 
 	if (file) {
@@ -2002,8 +2000,7 @@
 	vma->vm_end = addr + len;
 	vma->vm_pgoff = pgoff;
 	vma->vm_flags = flags;
-	vma->vm_page_prot = protection_map[flags &
-				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+	vma->vm_page_prot = vm_get_page_prot(flags);
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 out:
 	mm->total_vm += len >> PAGE_SHIFT;
@@ -2209,7 +2206,7 @@
 	vma->vm_end = addr + len;
 
 	vma->vm_flags = vm_flags | mm->def_flags;
-	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
 	vma->vm_ops = &special_mapping_vmops;
 	vma->vm_private_data = pages;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 1d4d697..5522784 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -192,11 +192,9 @@
 	 * held in write mode.
 	 */
 	vma->vm_flags = newflags;
-	vma->vm_page_prot = protection_map[newflags &
-		(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+	vma->vm_page_prot = vm_get_page_prot(newflags);
 	if (vma_wants_writenotify(vma)) {
-		vma->vm_page_prot = protection_map[newflags &
-			(VM_READ|VM_WRITE|VM_EXEC)];
+		vma->vm_page_prot = vm_get_page_prot(newflags);
 		dirty_accountable = 1;
 	}
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a64decb..824cade 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -212,7 +212,7 @@
 		if (!p->mm)
 			continue;
 		/* skip the init task */
-		if (is_init(p))
+		if (is_global_init(p))
 			continue;
 
 		/*
@@ -265,7 +265,7 @@
  */
 static void __oom_kill_task(struct task_struct *p, int verbose)
 {
-	if (is_init(p)) {
+	if (is_global_init(p)) {
 		WARN_ON(1);
 		printk(KERN_WARNING "tried to kill init!\n");
 		return;
@@ -278,7 +278,8 @@
 	}
 
 	if (verbose)
-		printk(KERN_ERR "Killed process %d (%s)\n", p->pid, p->comm);
+		printk(KERN_ERR "Killed process %d (%s)\n",
+				task_pid_nr(p), p->comm);
 
 	/*
 	 * We give our sacrificial lamb high priority and access to
@@ -326,7 +327,7 @@
 	 * to memory reserves though, otherwise we might deplete all memory.
 	 */
 	do_each_thread(g, q) {
-		if (q->mm == mm && q->tgid != p->tgid)
+		if (q->mm == mm && !same_thread_group(q, p))
 			force_sig(SIGKILL, q);
 	} while_each_thread(g, q);
 
@@ -337,7 +338,6 @@
 			    unsigned long points, const char *message)
 {
 	struct task_struct *c;
-	struct list_head *tsk;
 
 	if (printk_ratelimit()) {
 		printk(KERN_WARNING "%s invoked oom-killer: "
@@ -357,11 +357,10 @@
 	}
 
 	printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n",
-					message, p->pid, p->comm, points);
+					message, task_pid_nr(p), p->comm, points);
 
 	/* Try to kill a child first */
-	list_for_each(tsk, &p->children) {
-		c = list_entry(tsk, struct task_struct, sibling);
+	list_for_each_entry(c, &p->children, sibling) {
 		if (c->mm == p->mm)
 			continue;
 		if (!oom_kill_task(c))
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index ff5784b..66c7369 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -656,11 +656,13 @@
 	}
 
 	if (req->subclass & 0x80) {
-		input->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-		input->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-		input->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-		input->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-		input->relbit[0] |= BIT(REL_WHEEL);
+		input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+		input->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+			BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+		input->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+		input->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) |
+			BIT_MASK(BTN_EXTRA);
+		input->relbit[0] |= BIT_MASK(REL_WHEEL);
 	}
 
 	input->dev.parent = hidp_get_device(session);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1f0068e..e0a0694 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -447,7 +447,8 @@
 	rcu_assign_pointer(sk->sk_filter, fp);
 	rcu_read_unlock_bh();
 
-	sk_filter_delayed_uncharge(sk, old_fp);
+	if (old_fp)
+		sk_filter_delayed_uncharge(sk, old_fp);
 	return 0;
 }
 
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 590a767..daadbcc 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -15,7 +15,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8cae60c..7ac7031 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -161,7 +161,7 @@
 #endif
 #include <asm/byteorder.h>
 #include <linux/rcupdate.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/uaccess.h>
@@ -3514,7 +3514,7 @@
 
 	init_waitqueue_head(&t->queue);
 
-	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid);
+	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1072d16..4a2640d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -744,10 +744,10 @@
 	rcu_read_lock();
 	tsk = find_task_by_pid(pid);
 	if (tsk) {
-		task_lock(tsk);
-		if (tsk->nsproxy)
-			net = get_net(tsk->nsproxy->net_ns);
-		task_unlock(tsk);
+		struct nsproxy *nsproxy;
+		nsproxy = task_nsproxy(tsk);
+		if (nsproxy)
+			net = get_net(nsproxy->net_ns);
 	}
 	rcu_read_unlock();
 	return net;
diff --git a/net/core/scm.c b/net/core/scm.c
index 530bee8..100ba6d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -24,6 +24,8 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/security.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -42,7 +44,7 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
-	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+	if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
 	    ((creds->uid == current->uid || creds->uid == current->euid ||
 	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
 	    ((creds->gid == current->gid || creds->gid == current->egid ||
diff --git a/net/core/sock.c b/net/core/sock.c
index d292b41..febbcbc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -232,7 +232,7 @@
 			warned++;
 			printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
 			       "tries to set negative timeout\n",
-				current->comm, current->pid);
+				current->comm, task_pid_nr(current));
 		return 0;
 	}
 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 81a8285..8d8c291 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -54,7 +54,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3cef128..8fb6ca2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -93,7 +93,7 @@
 		int remaining, rover, low, high;
 
 		inet_get_local_port_range(&low, &high);
-		remaining = high - low;
+		remaining = (high - low) + 1;
 		rover = net_random() % remaining + low;
 
 		do {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fac6398..16eecc7 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -286,7 +286,7 @@
 		struct inet_timewait_sock *tw = NULL;
 
 		inet_get_local_port_range(&low, &high);
-		remaining = high - low;
+		remaining = (high - low) + 1;
 
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 1960747..c99f2a3 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -794,7 +794,7 @@
 
 	add_wait_queue(&sync_wait, &wait);
 
-	set_sync_pid(state, current->pid);
+	set_sync_pid(state, task_pid_nr(current));
 	complete(tinfo->startup);
 
 	/*
@@ -877,7 +877,7 @@
 	if (!tinfo)
 		return -ENOMEM;
 
-	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
 		  sizeof(struct ip_vs_sync_conn));
 
@@ -917,7 +917,7 @@
 	    (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
 		return -ESRCH;
 
-	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
 	IP_VS_INFO("stopping sync thread %d ...\n",
 		   (state == IP_VS_STATE_MASTER) ?
 		   sync_master_pid : sync_backup_pid);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c78acc1..ffddd2b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -122,7 +122,7 @@
 	ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
 
 	if (write && ret == 0) {
-		if (range[1] <= range[0])
+		if (range[1] < range[0])
 			ret = -EINVAL;
 		else
 			set_local_port_range(range);
@@ -150,7 +150,7 @@
 
 	ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
 	if (ret == 0 && newval && newlen) {
-		if (range[1] <= range[0])
+		if (range[1] < range[0])
 			ret = -EINVAL;
 		else
 			set_local_port_range(range);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4f32200..2e6ad6d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1334,7 +1334,7 @@
 		if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
 			if (net_ratelimit())
 				printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
-				       current->comm, current->pid);
+				       current->comm, task_pid_nr(current));
 			peek_seq = tp->copied_seq;
 		}
 		continue;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cb9fc58..35d2b0e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -147,13 +147,14 @@
 	write_lock_bh(&udp_hash_lock);
 
 	if (!snum) {
-		int i, low, high;
+		int i, low, high, remaining;
 		unsigned rover, best, best_size_so_far;
 
 		inet_get_local_port_range(&low, &high);
+		remaining = (high - low) + 1;
 
 		best_size_so_far = UINT_MAX;
-		best = rover = net_random() % (high - low) + low;
+		best = rover = net_random() % remaining + low;
 
 		/* 1st pass: look for empty (or shortest) hash chain */
 		for (i = 0; i < UDP_HTABLE_SIZE; i++) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 1c2c276..d6f1026 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -261,7 +261,7 @@
 		struct inet_timewait_sock *tw = NULL;
 
 		inet_get_local_port_range(&low, &high);
-		remaining = high - low;
+		remaining = (high - low) + 1;
 
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 49eacba..46cf962 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -762,7 +762,7 @@
 			if (net_ratelimit())
 				printk(KERN_DEBUG "LLC(%s:%d): Application "
 						  "bug, race in MSG_PEEK.\n",
-				       current->comm, current->pid);
+				       current->comm, task_pid_nr(current));
 			peek_seq = llc->copied_seq;
 		}
 		continue;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d34a9de..4b4ed2a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -37,8 +37,6 @@
 
 struct ieee80211_local;
 
-#define BIT(x) (1 << (x))
-
 #define IEEE80211_ALIGN32_PAD(a) ((4 - ((a) & 3)) & 3)
 
 /* Maximum number of broadcast/multicast frames to buffer when some of the
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index db81aef..f7ffeec 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -108,14 +108,11 @@
 	u8 wmm_param_len;
 };
 
-enum ParseRes { ParseOK = 0, ParseUnknown = 1, ParseFailed = -1 };
-
-static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
-					    struct ieee802_11_elems *elems)
+static void ieee802_11_parse_elems(u8 *start, size_t len,
+				   struct ieee802_11_elems *elems)
 {
 	size_t left = len;
 	u8 *pos = start;
-	int unknown = 0;
 
 	memset(elems, 0, sizeof(*elems));
 
@@ -126,15 +123,8 @@
 		elen = *pos++;
 		left -= 2;
 
-		if (elen > left) {
-#if 0
-			if (net_ratelimit())
-				printk(KERN_DEBUG "IEEE 802.11 element parse "
-				       "failed (id=%d elen=%d left=%d)\n",
-				       id, elen, left);
-#endif
-			return ParseFailed;
-		}
+		if (elen > left)
+			return;
 
 		switch (id) {
 		case WLAN_EID_SSID:
@@ -201,28 +191,15 @@
 			elems->ext_supp_rates_len = elen;
 			break;
 		default:
-#if 0
-			printk(KERN_DEBUG "IEEE 802.11 element parse ignored "
-				      "unknown element (id=%d elen=%d)\n",
-				      id, elen);
-#endif
-			unknown++;
 			break;
 		}
 
 		left -= elen;
 		pos += elen;
 	}
-
-	/* Do not trigger error if left == 1 as Apple Airport base stations
-	 * send AssocResps that are one spurious byte too long. */
-
-	return unknown ? ParseUnknown : ParseOK;
 }
 
 
-
-
 static int ecw2cw(int ecw)
 {
 	int cw = 1;
@@ -931,12 +908,7 @@
 
 	printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name);
 	pos = mgmt->u.auth.variable;
-	if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems)
-	    == ParseFailed) {
-		printk(KERN_DEBUG "%s: failed to parse Auth(challenge)\n",
-		       dev->name);
-		return;
-	}
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
 	if (!elems.challenge) {
 		printk(KERN_DEBUG "%s: no challenge IE in shared key auth "
 		       "frame\n", dev->name);
@@ -1230,12 +1202,7 @@
 	aid &= ~(BIT(15) | BIT(14));
 
 	pos = mgmt->u.assoc_resp.variable;
-	if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems)
-	    == ParseFailed) {
-		printk(KERN_DEBUG "%s: failed to parse AssocResp\n",
-		       dev->name);
-		return;
-	}
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
 
 	if (!elems.supp_rates) {
 		printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
@@ -1459,7 +1426,7 @@
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee802_11_elems elems;
 	size_t baselen;
-	int channel, invalid = 0, clen;
+	int channel, clen;
 	struct ieee80211_sta_bss *bss;
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1505,9 +1472,7 @@
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 	}
 
-	if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen,
-				   &elems) == ParseFailed)
-		invalid = 1;
+	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
 	if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
 	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
@@ -1724,9 +1689,7 @@
 	if (baselen > len)
 		return;
 
-	if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen,
-				   &elems) == ParseFailed)
-		return;
+	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
 	if (elems.erp_info && elems.erp_info_len >= 1)
 		ieee80211_handle_erp_ie(dev, elems.erp_info[0]);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index af79423..9ec5013 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -2,13 +2,13 @@
  * GPL (C) 2002 Martin Devera (devik@cdi.cz).
  */
 #include <linux/module.h>
+#include <linux/bitops.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connbytes.h>
 #include <net/netfilter/nf_conntrack.h>
 
 #include <asm/div64.h>
-#include <asm/bitops.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e11000a..d093650 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1623,11 +1623,6 @@
 	.close =packet_mm_close,
 };
 
-static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
-{
-	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
-}
-
 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
 {
 	int i;
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index eaabf08..d1e9d68 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -146,18 +146,18 @@
 static const struct input_device_id rfkill_ids[] = {
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT(EV_KEY) },
-		.keybit = { [LONG(KEY_WLAN)] = BIT(KEY_WLAN) },
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) },
 	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT(EV_KEY) },
-		.keybit = { [LONG(KEY_BLUETOOTH)] = BIT(KEY_BLUETOOTH) },
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) },
 	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT(EV_KEY) },
-		.keybit = { [LONG(KEY_UWB)] = BIT(KEY_UWB) },
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) },
 	},
 	{ }
 };
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 92435a8..9c15c488 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -2,9 +2,7 @@
 # Traffic control configuration.
 # 
 
-menu "QoS and/or fair queueing"
-
-config NET_SCHED
+menuconfig NET_SCHED
 	bool "QoS and/or fair queueing"
 	select NET_SCH_FIFO
 	---help---
@@ -41,9 +39,6 @@
 	  The available schedulers are listed in the following questions; you
 	  can say Y to as many as you like. If unsure, say N now.
 
-config NET_SCH_FIFO
-	bool
-
 if NET_SCHED
 
 comment "Queueing/Scheduling"
@@ -500,4 +495,5 @@
 
 endif # NET_SCHED
 
-endmenu
+config NET_SCH_FIFO
+	bool
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e01d576..fa1a6f4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -556,6 +556,7 @@
 {
 	struct Qdisc *qdisc;
 	struct sk_buff *skb;
+	int running;
 
 	spin_lock_bh(&dev->queue_lock);
 	qdisc = dev->qdisc;
@@ -571,12 +572,31 @@
 
 	dev_watchdog_down(dev);
 
-	/* Wait for outstanding dev_queue_xmit calls. */
+	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
 	synchronize_rcu();
 
 	/* Wait for outstanding qdisc_run calls. */
-	while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
-		yield();
+	do {
+		while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
+			yield();
+
+		/*
+		 * Double-check inside queue lock to ensure that all effects
+		 * of the queue run are visible when we return.
+		 */
+		spin_lock_bh(&dev->queue_lock);
+		running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
+		spin_unlock_bh(&dev->queue_lock);
+
+		/*
+		 * The running flag should never be set at this point because
+		 * we've already set dev->qdisc to noop_qdisc *inside* the same
+		 * pair of spin locks.  That is, if any qdisc_run starts after
+		 * our initial test it should see the noop_qdisc and then
+		 * clear the RUNNING bit before dropping the queue lock.  So
+		 * if it is set here then we've found a bug.
+		 */
+	} while (WARN_ON_ONCE(running));
 }
 
 void dev_init_scheduler(struct net_device *dev)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3c773c5..c98873f 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -847,7 +847,7 @@
 	task->tk_start = jiffies;
 
 	dprintk("RPC:       new task initialized, procpid %u\n",
-				current->pid);
+				task_pid_nr(current));
 }
 
 static struct rpc_task *
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6996cba..9163ec5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -483,7 +483,7 @@
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= current->tgid;
+	sk->sk_peercred.pid	= task_tgid_vnr(current);
 	sk->sk_peercred.uid	= current->euid;
 	sk->sk_peercred.gid	= current->egid;
 	err = 0;
@@ -1133,7 +1133,7 @@
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= current->tgid;
+	newsk->sk_peercred.pid	= task_tgid_vnr(current);
 	newsk->sk_peercred.uid	= current->euid;
 	newsk->sk_peercred.gid	= current->egid;
 	newu = unix_sk(newsk);
@@ -1194,7 +1194,7 @@
 	sock_hold(skb);
 	unix_peer(ska)=skb;
 	unix_peer(skb)=ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
+	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
 
diff --git a/samples/Kconfig b/samples/Kconfig
new file mode 100644
index 0000000..57bb223
--- /dev/null
+++ b/samples/Kconfig
@@ -0,0 +1,16 @@
+# samples/Kconfig
+
+menuconfig SAMPLES
+	bool "Sample kernel code"
+	help
+	  You can build and test sample kernel code here.
+
+if SAMPLES
+
+config SAMPLE_MARKERS
+	tristate "Build markers examples -- loadable modules only"
+	depends on MARKERS && m
+	help
+	  This build markers example modules.
+
+endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
new file mode 100644
index 0000000..5a4f0b6
--- /dev/null
+++ b/samples/Makefile
@@ -0,0 +1,3 @@
+# Makefile for Linux samples code
+
+obj-$(CONFIG_SAMPLES)	+= markers/
diff --git a/samples/markers/Makefile b/samples/markers/Makefile
new file mode 100644
index 0000000..6d72312
--- /dev/null
+++ b/samples/markers/Makefile
@@ -0,0 +1,4 @@
+# builds the kprobes example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
new file mode 100644
index 0000000..e787c6d
--- /dev/null
+++ b/samples/markers/marker-example.c
@@ -0,0 +1,54 @@
+/* marker-example.c
+ *
+ * Executes a marker when /proc/marker-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+	int i;
+
+	trace_mark(subsystem_event, "%d %s", 123, "example string");
+	for (i = 0; i < 10; i++)
+		trace_mark(subsystem_eventb, MARK_NOARGS);
+	return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+	.open = my_open,
+};
+
+static int example_init(void)
+{
+	printk(KERN_ALERT "example init\n");
+	pentry_example = create_proc_entry("marker-example", 0444, NULL);
+	if (pentry_example)
+		pentry_example->proc_fops = &mark_ops;
+	else
+		return -EPERM;
+	return 0;
+}
+
+static void example_exit(void)
+{
+	printk(KERN_ALERT "example exit\n");
+	remove_proc_entry("marker-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Marker example");
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
new file mode 100644
index 0000000..238b2e3
--- /dev/null
+++ b/samples/markers/probe-example.c
@@ -0,0 +1,98 @@
+/* probe-example.c
+ *
+ * Connects two functions to marker call sites.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <asm/atomic.h>
+
+struct probe_data {
+	const char *name;
+	const char *format;
+	marker_probe_func *probe_func;
+};
+
+void probe_subsystem_event(const struct marker *mdata, void *private,
+	const char *format, ...)
+{
+	va_list ap;
+	/* Declare args */
+	unsigned int value;
+	const char *mystr;
+
+	/* Assign args */
+	va_start(ap, format);
+	value = va_arg(ap, typeof(value));
+	mystr = va_arg(ap, typeof(mystr));
+
+	/* Call printk */
+	printk(KERN_DEBUG "Value %u, string %s\n", value, mystr);
+
+	/* or count, check rights, serialize data in a buffer */
+
+	va_end(ap);
+}
+
+atomic_t eventb_count = ATOMIC_INIT(0);
+
+void probe_subsystem_eventb(const struct marker *mdata, void *private,
+	const char *format, ...)
+{
+	/* Increment counter */
+	atomic_inc(&eventb_count);
+}
+
+static struct probe_data probe_array[] =
+{
+	{	.name = "subsystem_event",
+		.format = "%d %s",
+		.probe_func = probe_subsystem_event },
+	{	.name = "subsystem_eventb",
+		.format = MARK_NOARGS,
+		.probe_func = probe_subsystem_eventb },
+};
+
+static int __init probe_init(void)
+{
+	int result;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+		result = marker_probe_register(probe_array[i].name,
+				probe_array[i].format,
+				probe_array[i].probe_func, &probe_array[i]);
+		if (result)
+			printk(KERN_INFO "Unable to register probe %s\n",
+				probe_array[i].name);
+		result = marker_arm(probe_array[i].name);
+		if (result)
+			printk(KERN_INFO "Unable to arm probe %s\n",
+				probe_array[i].name);
+	}
+	return 0;
+}
+
+static void __exit probe_fini(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(probe_array); i++)
+		marker_probe_unregister(probe_array[i].name);
+	printk(KERN_INFO "Number of event b : %u\n",
+			atomic_read(&eventb_count));
+}
+
+module_init(probe_init);
+module_exit(probe_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("SUBSYSTEM Probe");
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index de7bb28..b96ea8d 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -56,6 +56,17 @@
 # gcc support functions
 # See documentation in Documentation/kbuild/makefiles.txt
 
+# cc-cross-prefix
+# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-)
+# Return first prefix where a prefix$(CC) is found in PATH.
+# If no $(CC) found in PATH with listed prefixes return nothing
+cc-cross-prefix =  \
+	$(word 1, $(foreach c,$(1),                                   \
+		$(shell set -e;                                       \
+		if (which $(strip $(c))$(CC)) > /dev/null 2>&1 ; then \
+			echo $(c);                                    \
+		fi)))
+
 # output directory for tests below
 TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/)
 
diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c
index e5c6ac7..0e4bd54 100644
--- a/scripts/basic/docproc.c
+++ b/scripts/basic/docproc.c
@@ -66,12 +66,15 @@
 #define FUNCTION      "-function"
 #define NOFUNCTION    "-nofunction"
 
+char *srctree;
+
 void usage (void)
 {
 	fprintf(stderr, "Usage: docproc {doc|depend} file\n");
 	fprintf(stderr, "Input is read from file.tmpl. Output is sent to stdout\n");
 	fprintf(stderr, "doc: frontend when generating kernel documentation\n");
 	fprintf(stderr, "depend: generate list of files referenced within file\n");
+	fprintf(stderr, "Environment variable SRCTREE: absolute path to kernel source tree.\n");
 }
 
 /*
@@ -90,7 +93,7 @@
 			exit(1);
 		case  0:
 			memset(real_filename, 0, sizeof(real_filename));
-			strncat(real_filename, getenv("SRCTREE"), PATH_MAX);
+			strncat(real_filename, srctree, PATH_MAX);
 			strncat(real_filename, KERNELDOCPATH KERNELDOC,
 					PATH_MAX - strlen(real_filename));
 			execvp(real_filename, svec);
@@ -171,7 +174,7 @@
 	if (filename_exist(filename) == NULL) {
 		char real_filename[PATH_MAX + 1];
 		memset(real_filename, 0, sizeof(real_filename));
-		strncat(real_filename, getenv("SRCTREE"), PATH_MAX);
+		strncat(real_filename, srctree, PATH_MAX);
 		strncat(real_filename, filename,
 				PATH_MAX - strlen(real_filename));
 		sym = add_new_file(filename);
@@ -338,6 +341,10 @@
 int main(int argc, char *argv[])
 {
 	FILE * infile;
+
+	srctree = getenv("SRCTREE");
+	if (!srctree)
+		srctree = getcwd(NULL, 0);
 	if (argc != 3) {
 		usage();
 		exit(1);
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index b458e2a..28e480c 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -15,7 +15,7 @@
 #	AVR32 port by Haavard Skinnemoen <hskinnemoen@atmel.com>
 #
 #	Usage:
-#	objdump -d vmlinux | stackcheck.pl [arch]
+#	objdump -d vmlinux | scripts/checkstack.pl [arch]
 #
 #	TODO :	Port to all architectures (one regex per arch)
 
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index bb08069..83c5e76 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -84,7 +84,7 @@
 # lxdialog stuff
 check-lxdialog  := $(srctree)/$(src)/lxdialog/check-lxdialog.sh
 
-# Use reursively expanded variables so we do not call gcc unless
+# Use recursively expanded variables so we do not call gcc unless
 # we really need to do so. (Do not call gcc as part of make mrproper)
 HOST_EXTRACFLAGS = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags)
 HOST_LOADLIBES   = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ldflags $(HOSTCC))
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 2ef9a19..93ac52a 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -268,6 +268,9 @@
 			     "was in %s%s\n", mod->name, name,
 			     s->module->name,
 			     is_vmlinux(s->module->name) ?"":".ko");
+		} else {
+			/* In case Modules.symvers was out of date */
+			s->module = mod;
 		}
 	}
 	s->preloaded = 0;
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 6edb29f..0f657b5 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -83,6 +83,7 @@
 Standards-Version: 3.6.1
 
 Package: $packagename
+Provides: kernel-image-$version, linux-image-$version
 Architecture: any
 Description: User Mode Linux kernel, version $version
  User-mode Linux is a port of the Linux kernel to its own system call
@@ -104,6 +105,7 @@
 Standards-Version: 3.6.1
 
 Package: $packagename
+Provides: kernel-image-$version, linux-image-$version
 Architecture: any
 Description: Linux kernel, version $version
  This package contains the Linux kernel, modules and corresponding other
diff --git a/security/commoncap.c b/security/commoncap.c
index 48ca5b0..43f9027 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -23,6 +23,7 @@
 #include <linux/xattr.h>
 #include <linux/hugetlb.h>
 #include <linux/mount.h>
+#include <linux/sched.h>
 
 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 /*
@@ -334,7 +335,7 @@
 	/* For init, we want to retain the capabilities set
 	 * in the init_task struct. Thus we skip the usual
 	 * capability rules */
-	if (!is_init(current)) {
+	if (!is_global_init(current)) {
 		current->cap_permitted = new_permitted;
 		current->cap_effective = bprm->cap_effective ?
 				new_permitted : 0;
diff --git a/sound/ppc/beep.c b/sound/ppc/beep.c
index a1aa89f..566b5ab 100644
--- a/sound/ppc/beep.c
+++ b/sound/ppc/beep.c
@@ -236,8 +236,8 @@
 	input_dev->id.product = 0x0001;
 	input_dev->id.version = 0x0100;
 
-	input_dev->evbit[0] = BIT(EV_SND);
-	input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	input_dev->evbit[0] = BIT_MASK(EV_SND);
+	input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 	input_dev->event = snd_pmac_beep_event;
 	input_dev->dev.parent = &chip->pdev->dev;
 	input_set_drvdata(input_dev, chip);
diff --git a/sound/usb/caiaq/caiaq-input.c b/sound/usb/caiaq/caiaq-input.c
index a1de0c6..cd536ca 100644
--- a/sound/usb/caiaq/caiaq-input.c
+++ b/sound/usb/caiaq/caiaq-input.c
@@ -200,8 +200,9 @@
 
         switch (dev->chip.usb_id) {
 	case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL2):
-		input->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-		input->absbit[0] = BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_Z);
+		input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+		input->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+			BIT_MASK(ABS_Z);
 		input->keycode = keycode_rk2;
 		input->keycodesize = sizeof(char);
 		input->keycodemax = ARRAY_SIZE(keycode_rk2);
@@ -228,8 +229,8 @@
 		snd_usb_caiaq_set_auto_msg(dev, 1, 10, 0);
 		break;
 	case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_AK1):
-		input->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-		input->absbit[0] = BIT(ABS_X);
+		input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+		input->absbit[0] = BIT_MASK(ABS_X);
 		input->keycode = keycode_ak1;
 		input->keycodesize = sizeof(char);
 		input->keycodemax = ARRAY_SIZE(keycode_ak1);