Merge branch 'akpm' (incoming from Andrew)
Merge second patch-bomb from Andrew Morton:
- the rest of MM
- zram updates
- zswap updates
- exit
- procfs
- exec
- wait
- crash dump
- lib/idr
- rapidio
- adfs, affs, bfs, ufs
- cris
- Kconfig things
- initramfs
- small amount of IPC material
- percpu enhancements
- early ioremap support
- various other misc things
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (156 commits)
MAINTAINERS: update Intel C600 SAS driver maintainers
fs/ufs: remove unused ufs_super_block_third pointer
fs/ufs: remove unused ufs_super_block_second pointer
fs/ufs: remove unused ufs_super_block_first pointer
fs/ufs/super.c: add __init to init_inodecache()
doc/kernel-parameters.txt: add early_ioremap_debug
arm64: add early_ioremap support
arm64: initialize pgprot info earlier in boot
x86: use generic early_ioremap
mm: create generic early_ioremap() support
x86/mm: sparse warning fix for early_memremap
lglock: map to spinlock when !CONFIG_SMP
percpu: add preemption checks to __this_cpu ops
vmstat: use raw_cpu_ops to avoid false positives on preemption checks
slub: use raw_cpu_inc for incrementing statistics
net: replace __this_cpu_inc in route.c with raw_cpu_inc
modules: use raw_cpu_write for initialization of per cpu refcount.
mm: use raw_cpu ops for determining current NUMA node
percpu: add raw_cpu_ops
slub: fix leak of 'name' in sysfs_slab_add
...
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index 3f0b9ae..70ec992 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -43,6 +43,36 @@
The invalid_io file is read-only and specifies the number of
non-page-size-aligned I/O requests issued to this device.
+What: /sys/block/zram<id>/failed_reads
+Date: February 2014
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The failed_reads file is read-only and specifies the number of
+ failed reads happened on this device.
+
+What: /sys/block/zram<id>/failed_writes
+Date: February 2014
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The failed_writes file is read-only and specifies the number of
+ failed writes happened on this device.
+
+What: /sys/block/zram<id>/max_comp_streams
+Date: February 2014
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The max_comp_streams file is read-write and specifies the
+ number of backend's zcomp_strm compression streams (number of
+ concurrent compress operations).
+
+What: /sys/block/zram<id>/comp_algorithm
+Date: February 2014
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The comp_algorithm file is read-write and lets to show
+ available and selected compression algorithms, change
+ compression algorithm selection.
+
What: /sys/block/zram<id>/notify_free
Date: August 2010
Contact: Nitin Gupta <ngupta@vflare.org>
@@ -53,15 +83,6 @@
is freed. This statistic is applicable only when this disk is
being used as a swap disk.
-What: /sys/block/zram<id>/discard
-Date: August 2010
-Contact: Nitin Gupta <ngupta@vflare.org>
-Description:
- The discard file is read-only and specifies the number of
- discard requests received by this device. These requests
- provide information to block device regarding blocks which are
- no longer used by filesystem.
-
What: /sys/block/zram<id>/zero_pages
Date: August 2010
Contact: Nitin Gupta <ngupta@vflare.org>
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index bd9015d..e84f094 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -671,7 +671,7 @@
<sect1 id="routines-local-irqs">
<title><function>local_irq_save()</function>/<function>local_irq_restore()</function>
- <filename class="headerfile">include/asm/system.h</filename>
+ <filename class="headerfile">include/linux/irqflags.h</filename>
</title>
<para>
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 85e24c4..d50fa61 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -39,7 +39,7 @@
ffffffbffb000000 ffffffbffbbfffff 12MB [guard]
-ffffffbffbc00000 ffffffbffbdfffff 2MB earlyprintk device
+ffffffbffbc00000 ffffffbffbdfffff 2MB fixed mappings
ffffffbffbe00000 ffffffbffbffffff 2MB [guard]
@@ -66,7 +66,7 @@
fffffdfffb000000 fffffdfffbbfffff 12MB [guard]
-fffffdfffbc00000 fffffdfffbdfffff 2MB earlyprintk device
+fffffdfffbc00000 fffffdfffbdfffff 2MB fixed mappings
fffffdfffbe00000 fffffdfffbffffff 2MB [guard]
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 2eccddf..0595c3f 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -21,7 +21,43 @@
This creates 4 devices: /dev/zram{0,1,2,3}
(num_devices parameter is optional. Default: 1)
-2) Set Disksize
+2) Set max number of compression streams
+ Compression backend may use up to max_comp_streams compression streams,
+ thus allowing up to max_comp_streams concurrent compression operations.
+ By default, compression backend uses single compression stream.
+
+ Examples:
+ #show max compression streams number
+ cat /sys/block/zram0/max_comp_streams
+
+ #set max compression streams number to 3
+ echo 3 > /sys/block/zram0/max_comp_streams
+
+Note:
+In order to enable compression backend's multi stream support max_comp_streams
+must be initially set to desired concurrency level before ZRAM device
+initialisation. Once the device initialised as a single stream compression
+backend (max_comp_streams equals to 1), you will see error if you try to change
+the value of max_comp_streams because single stream compression backend
+implemented as a special case by lock overhead issue and does not support
+dynamic max_comp_streams. Only multi stream backend supports dynamic
+max_comp_streams adjustment.
+
+3) Select compression algorithm
+ Using comp_algorithm device attribute one can see available and
+ currently selected (shown in square brackets) compression algortithms,
+ change selected compression algorithm (once the device is initialised
+ there is no way to change compression algorithm).
+
+ Examples:
+ #show supported compression algorithms
+ cat /sys/block/zram0/comp_algorithm
+ lzo [lz4]
+
+ #select lzo compression algorithm
+ echo lzo > /sys/block/zram0/comp_algorithm
+
+4) Set Disksize
Set disk size by writing the value to sysfs node 'disksize'.
The value can be either in bytes or you can use mem suffixes.
Examples:
@@ -33,32 +69,38 @@
echo 512M > /sys/block/zram0/disksize
echo 1G > /sys/block/zram0/disksize
-3) Activate:
+Note:
+There is little point creating a zram of greater than twice the size of memory
+since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
+size of the disk when not in use so a huge zram is wasteful.
+
+5) Activate:
mkswap /dev/zram0
swapon /dev/zram0
mkfs.ext4 /dev/zram1
mount /dev/zram1 /tmp
-4) Stats:
+6) Stats:
Per-device statistics are exported as various nodes under
/sys/block/zram<id>/
disksize
num_reads
num_writes
+ failed_reads
+ failed_writes
invalid_io
notify_free
- discard
zero_pages
orig_data_size
compr_data_size
mem_used_total
-5) Deactivate:
+7) Deactivate:
swapoff /dev/zram0
umount /dev/zram1
-6) Reset:
+8) Reset:
Write any positive value to 'reset' sysfs node
echo 1 > /sys/block/zram0/reset
echo 1 > /sys/block/zram1/reset
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index ce94a83..80ac454 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -24,7 +24,7 @@
a page/swp_entry may be charged (usage += PAGE_SIZE) at
- mem_cgroup_newpage_charge()
+ mem_cgroup_charge_anon()
Called at new page fault and Copy-On-Write.
mem_cgroup_try_charge_swapin()
@@ -32,7 +32,7 @@
Followed by charge-commit-cancel protocol. (With swap accounting)
At commit, a charge recorded in swap_cgroup is removed.
- mem_cgroup_cache_charge()
+ mem_cgroup_charge_file()
Called at add_to_page_cache()
mem_cgroup_cache_charge_swapin()
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index 5108afb..762ca54 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -76,15 +76,7 @@
limit_fail_at parameter is set to the particular res_counter element
where the charging failed.
- d. int res_counter_charge_locked
- (struct res_counter *rc, unsigned long val, bool force)
-
- The same as res_counter_charge(), but it must not acquire/release the
- res_counter->lock internally (it must be called with res_counter->lock
- held). The force parameter indicates whether we can bypass the limit.
-
- e. u64 res_counter_uncharge[_locked]
- (struct res_counter *rc, unsigned long val)
+ d. u64 res_counter_uncharge(struct res_counter *rc, unsigned long val)
When a resource is released (freed) it should be de-accounted
from the resource counter it was accounted to. This is called
@@ -93,7 +85,7 @@
The _locked routines imply that the res_counter->lock is taken.
- f. u64 res_counter_uncharge_until
+ e. u64 res_counter_uncharge_until
(struct res_counter *rc, struct res_counter *top,
unsigned long val)
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index f424e0e..efca5c1 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -529,6 +529,7 @@
open: yes
close: yes
fault: yes can return with page locked
+map_pages: yes
page_mkwrite: yes can return with page locked
access: yes
@@ -540,6 +541,15 @@
subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
locked. The VM will unlock the page.
+ ->map_pages() is called when VM asks to map easy accessible pages.
+Filesystem should find and map pages associated with offsets from "pgoff"
+till "max_pgoff". ->map_pages() is called with page table locked and must
+not block. If it's not possible to reach a page without blocking,
+filesystem should skip it. Filesystem should use do_set_pte() to setup
+page table entry. Pointer to entry associated with offset "pgoff" is
+passed in "pte" field in vm_fault structure. Pointers to entries for other
+offsets should be calculated relative to "pte".
+
->page_mkwrite() is called when a previously read-only pte is
about to become writeable. The filesystem again must ensure that there are
no truncate/invalidate races, and then return with the page locked. If
diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.txt
index 81ac488..71b63c2 100644
--- a/Documentation/filesystems/affs.txt
+++ b/Documentation/filesystems/affs.txt
@@ -49,6 +49,10 @@
This is useful since most of the plain AmigaOS files
will map to 600.
+nofilenametruncate
+ The file system will return an error when filename exceeds
+ standard maximum filename length (30 characters).
+
reserved=num Sets the number of reserved blocks at the start of the
partition to num. You should never need this option.
Default is 2.
@@ -181,9 +185,8 @@
this fs. For a most up-to-date list of bugs please consult
fs/affs/Changes.
-Filenames are truncated to 30 characters without warning (this
-can be changed by setting the compile-time option AFFS_NO_TRUNCATE
-in include/linux/amigaffs.h).
+By default, filenames are truncated to 30 characters without warning.
+'nofilenametruncate' mount option can change that behavior.
Case is ignored by the affs in filename matching, but Linux shells
do care about the case. Example (with /wb being an affs mounted fs):
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index f00bee14..8b9cd8e 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1648,18 +1648,21 @@
if precise results are needed.
-3.7 /proc/<pid>/fdinfo/<fd> - Information about opened file
+3.8 /proc/<pid>/fdinfo/<fd> - Information about opened file
---------------------------------------------------------------
This file provides information associated with an opened file. The regular
-files have at least two fields -- 'pos' and 'flags'. The 'pos' represents
-the current offset of the opened file in decimal form [see lseek(2) for
-details] and 'flags' denotes the octal O_xxx mask the file has been
-created with [see open(2) for details].
+files have at least three fields -- 'pos', 'flags' and mnt_id. The 'pos'
+represents the current offset of the opened file in decimal form [see lseek(2)
+for details], 'flags' denotes the octal O_xxx mask the file has been
+created with [see open(2) for details] and 'mnt_id' represents mount ID of
+the file system containing the opened file [see 3.5 /proc/<pid>/mountinfo
+for details].
A typical output is
pos: 0
flags: 0100002
+ mnt_id: 19
The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags
pair provide additional information particular to the objects they represent.
@@ -1668,6 +1671,7 @@
~~~~~~~~~~~~~
pos: 0
flags: 04002
+ mnt_id: 9
eventfd-count: 5a
where 'eventfd-count' is hex value of a counter.
@@ -1676,6 +1680,7 @@
~~~~~~~~~~~~~~
pos: 0
flags: 04002
+ mnt_id: 9
sigmask: 0000000000000200
where 'sigmask' is hex value of the signal mask associated
@@ -1685,6 +1690,7 @@
~~~~~~~~~~~
pos: 0
flags: 02
+ mnt_id: 9
tfd: 5 events: 1d data: ffffffffffffffff
where 'tfd' is a target file descriptor number in decimal form,
@@ -1718,6 +1724,7 @@
pos: 0
flags: 02
+ mnt_id: 9
fanotify flags:10 event-flags:0
fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003
fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
index 67aa71e..f6da05670 100644
--- a/Documentation/irqflags-tracing.txt
+++ b/Documentation/irqflags-tracing.txt
@@ -22,13 +22,6 @@
Architectures that want to support this need to do a couple of
code-organizational changes first:
-- move their irq-flags manipulation code from their asm/system.h header
- to asm/irqflags.h
-
-- rename local_irq_disable()/etc to raw_local_irq_disable()/etc. so that
- the linux/irqflags.h code can inject callbacks and can construct the
- real local_irq_disable()/etc APIs.
-
- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
and then a couple of functional changes are needed as well to implement
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index c420676..350f733 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -157,6 +157,10 @@
to the build environment (if this is desired, it can be done via
another symbol).
+ - "allnoconfig_y"
+ This declares the symbol as one that should have the value y when
+ using "allnoconfig". Used for symbols that hide other symbols.
+
Menu dependencies
-----------------
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bc34785..b6c67d5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -884,6 +884,11 @@
Enable debug messages at boot time. See
Documentation/dynamic-debug-howto.txt for details.
+ early_ioremap_debug [KNL]
+ Enable debug messages in early_ioremap support. This
+ is useful for tracking down temporary early mappings
+ which are not unmapped.
+
earlycon= [KNL] Output early console device and options.
uart[8250],io,<addr>[,options]
uart[8250],mmio,<addr>[,options]
diff --git a/Documentation/rapidio/sysfs.txt b/Documentation/rapidio/sysfs.txt
index 271438c..47ce9a5 100644
--- a/Documentation/rapidio/sysfs.txt
+++ b/Documentation/rapidio/sysfs.txt
@@ -2,8 +2,8 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-1. Device Subdirectories
-------------------------
+1. RapidIO Device Subdirectories
+--------------------------------
For each RapidIO device, the RapidIO subsystem creates files in an individual
subdirectory with the following name, /sys/bus/rapidio/devices/<device_name>.
@@ -25,8 +25,8 @@
NOTE: An enumerating or discovering endpoint does not create a sysfs entry for
itself, this is why an endpoint with destID=1 is not shown in the list.
-2. Attributes Common for All Devices
-------------------------------------
+2. Attributes Common for All RapidIO Devices
+--------------------------------------------
Each device subdirectory contains the following informational read-only files:
@@ -52,16 +52,16 @@
and provides an access to the RapidIO device registers using standard file read
and write operations.
-3. Endpoint Device Attributes
------------------------------
+3. RapidIO Endpoint Device Attributes
+-------------------------------------
Currently Linux RapidIO subsystem does not create any endpoint specific sysfs
attributes. It is possible that RapidIO master port drivers and endpoint device
drivers will add their device-specific sysfs attributes but such attributes are
outside the scope of this document.
-4. Switch Device Attributes
----------------------------
+4. RapidIO Switch Device Attributes
+-----------------------------------
RapidIO switches have additional attributes in sysfs. RapidIO subsystem supports
common and device-specific sysfs attributes for switches. Because switches are
@@ -106,3 +106,53 @@
for that controller always will be 0.
To initiate RapidIO enumeration/discovery on all available mports
a user must write '-1' (or RIO_MPORT_ANY) into this attribute file.
+
+
+6. RapidIO Bus Controllers/Ports
+--------------------------------
+
+On-chip RapidIO controllers and PCIe-to-RapidIO bridges (referenced as
+"Master Port" or "mport") are presented in sysfs as the special class of
+devices: "rapidio_port".
+
+The /sys/class/rapidio_port subdirectory contains individual subdirectories
+named as "rapidioN" where N = mport ID registered with RapidIO subsystem.
+
+NOTE: An mport ID is not a RapidIO destination ID assigned to a given local
+mport device.
+
+Each mport device subdirectory in addition to standard entries contains the
+following device-specific attributes:
+
+ port_destid - reports RapidIO destination ID assigned to the given RapidIO
+ mport device. If value 0xFFFFFFFF is returned this means that
+ no valid destination ID have been assigned to the mport (yet).
+ Normally, before enumeration/discovery have been executed only
+ fabric enumerating mports have a valid destination ID assigned
+ to them using "hdid=..." rapidio module parameter.
+ sys_size - reports RapidIO common transport system size:
+ 0 = small (8-bit destination ID, max. 256 devices),
+ 1 = large (16-bit destination ID, max. 65536 devices).
+
+After enumeration or discovery was performed for a given mport device,
+the corresponding subdirectory will also contain subdirectories for each
+child RapidIO device connected to the mport. Naming conventions for RapidIO
+devices are described in Section 1 above.
+
+The example below shows mport device subdirectory with several child RapidIO
+devices attached to it.
+
+[rio@rapidio ~]$ ls /sys/class/rapidio_port/rapidio0/ -l
+total 0
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0001
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0004
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0007
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0002
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0003
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0005
+lrwxrwxrwx 1 root root 0 Feb 11 15:11 device -> ../../../0000:01:00.0
+-r--r--r-- 1 root root 4096 Feb 11 15:11 port_destid
+drwxr-xr-x 2 root root 0 Feb 11 15:11 power
+lrwxrwxrwx 1 root root 0 Feb 11 15:04 subsystem -> ../../../../../../class/rapidio_port
+-r--r--r-- 1 root root 4096 Feb 11 15:11 sys_size
+-rw-r--r-- 1 root root 4096 Feb 11 15:04 uevent
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
index 9290de7..a2f27bb 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.txt
@@ -8,7 +8,7 @@
By default, the switch_to arch function is called with the runqueue
locked. This is usually not a problem unless switch_to may need to
take the runqueue lock. This is usually due to a wake up operation in
-the context switch. See arch/ia64/include/asm/system.h for an example.
+the context switch. See arch/ia64/include/asm/switch_to.h for an example.
To request the scheduler call switch_to with the runqueue unlocked,
you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 271a09d..9886c3d 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -317,6 +317,7 @@
This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
0: means infinite timeout - no checking done.
+Possible values to set are in range {0..LONG_MAX/HZ}.
==============================================================
diff --git a/MAINTAINERS b/MAINTAINERS
index bfff896..d0b8afe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4542,8 +4542,7 @@
INTEL C600 SERIES SAS CONTROLLER DRIVER
M: Intel SCU Linux support <intel-linux-scu@intel.com>
-M: Lukasz Dorau <lukasz.dorau@intel.com>
-M: Maciej Patelczyk <maciej.patelczyk@intel.com>
+M: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
M: Dave Jiang <dave.jiang@intel.com>
L: linux-scsi@vger.kernel.org
T: git git://git.code.sf.net/p/intel-sas/isci
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 75de197..9596b0ab 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -57,7 +57,7 @@
config MMU
def_bool y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
config GENERIC_CALIBRATE_DELAY
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d7a71e3..5db05f6a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -126,7 +126,7 @@
config HAVE_PROC_CPU
bool
-config NO_IOPORT
+config NO_IOPORT_MAP
bool
config EISA
@@ -410,7 +410,7 @@
select ISA
select NEED_MACH_IO_H
select NEED_MACH_MEMORY_H
- select NO_IOPORT
+ select NO_IOPORT_MAP
help
This is an evaluation board for the StrongARM processor available
from Digital. It has limited hardware on-board, including an
@@ -428,7 +428,7 @@
select CPU_V7M
select GENERIC_CLOCKEVENTS
select NO_DMA
- select NO_IOPORT
+ select NO_IOPORT_MAP
select SPARSE_IRQ
select USE_OF
help
@@ -677,7 +677,7 @@
select HAVE_SMP
select MIGHT_HAVE_CACHE_L2X0
select MULTI_IRQ_HANDLER
- select NO_IOPORT
+ select NO_IOPORT_MAP
select PINCTRL
select PM_GENERIC_DOMAINS if PM
select SPARSE_IRQ
@@ -699,7 +699,7 @@
select ISA_DMA_API
select NEED_MACH_IO_H
select NEED_MACH_MEMORY_H
- select NO_IOPORT
+ select NO_IOPORT_MAP
select VIRT_TO_BUS
help
On the Acorn Risc-PC, Linux can support the internal IDE disk and
@@ -760,7 +760,7 @@
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C2410_WATCHDOG if WATCHDOG
select HAVE_TCM
- select NO_IOPORT
+ select NO_IOPORT_MAP
select PLAT_SAMSUNG
select PM_GENERIC_DOMAINS if PM
select S3C_DEV_NAND
diff --git a/arch/arm/mach-picoxcell/Kconfig b/arch/arm/mach-picoxcell/Kconfig
index eca9eb1..62240f6 100644
--- a/arch/arm/mach-picoxcell/Kconfig
+++ b/arch/arm/mach-picoxcell/Kconfig
@@ -4,4 +4,4 @@
select ARM_VIC
select DW_APB_TIMER_OF
select HAVE_TCM
- select NO_IOPORT
+ select NO_IOPORT_MAP
diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig
index 3e81891..e4e505f 100644
--- a/arch/arm/mach-prima2/Kconfig
+++ b/arch/arm/mach-prima2/Kconfig
@@ -3,7 +3,7 @@
select ARCH_HAS_RESET_CONTROLLER
select ARCH_REQUIRE_GPIOLIB
select GENERIC_IRQ_CHIP
- select NO_IOPORT
+ select NO_IOPORT_MAP
select PINCTRL
select PINCTRL_SIRF
help
diff --git a/arch/arm/mach-s3c24xx/Kconfig b/arch/arm/mach-s3c24xx/Kconfig
index ba1cc62..40cf50b 100644
--- a/arch/arm/mach-s3c24xx/Kconfig
+++ b/arch/arm/mach-s3c24xx/Kconfig
@@ -12,7 +12,7 @@
config PLAT_S3C24XX
def_bool y
select ARCH_REQUIRE_GPIOLIB
- select NO_IOPORT
+ select NO_IOPORT_MAP
select S3C_DEV_NAND
select IRQ_DOMAIN
help
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index a182008..0f92ba8 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -10,7 +10,7 @@
select ARM_GIC
select MIGHT_HAVE_PCI
select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
- select NO_IOPORT
+ select NO_IOPORT_MAP
select PINCTRL
select ARCH_REQUIRE_GPIOLIB
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 80b4be3..657d52d 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -10,7 +10,7 @@
select HAVE_ARM_TWD if SMP
select HAVE_PATA_PLATFORM
select ICST
- select NO_IOPORT
+ select NO_IOPORT_MAP
select PLAT_VERSATILE
select PLAT_VERSATILE_CLCD
select POWER_RESET
diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig
index b57e922..243dfcb 100644
--- a/arch/arm/plat-samsung/Kconfig
+++ b/arch/arm/plat-samsung/Kconfig
@@ -9,7 +9,7 @@
depends on PLAT_S3C24XX || ARCH_S3C64XX || PLAT_S5P || ARCH_EXYNOS
default y
select GENERIC_IRQ_CHIP
- select NO_IOPORT
+ select NO_IOPORT_MAP
help
Base platform code for all Samsung SoC based systems
@@ -19,7 +19,7 @@
default y
select ARCH_REQUIRE_GPIOLIB
select ARM_VIC
- select NO_IOPORT
+ select NO_IOPORT_MAP
select PLAT_SAMSUNG
select S3C_GPIO_TRACK
select S5P_GPIO_DRVSTR
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9711a5f..e6e4d37 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -17,6 +17,7 @@
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_EARLY_IOREMAP
select GENERIC_IOMAP
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
@@ -66,7 +67,7 @@
config MMU
def_bool y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
config STACKTRACE_SUPPORT
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 4bca4923..83f71b3 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -10,6 +10,7 @@
generic-y += div64.h
generic-y += dma.h
generic-y += emergency-restart.h
+generic-y += early_ioremap.h
generic-y += errno.h
generic-y += ftrace.h
generic-y += hash.h
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
new file mode 100644
index 0000000..5f7bfe6
--- /dev/null
+++ b/arch/arm64/include/asm/fixmap.h
@@ -0,0 +1,67 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ * Copyright (C) 2013 Mark Salter <msalter@redhat.com>
+ *
+ * Adapted from arch/x86_64 version.
+ *
+ */
+
+#ifndef _ASM_ARM64_FIXMAP_H
+#define _ASM_ARM64_FIXMAP_H
+
+#ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#include <asm/page.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process.
+ *
+ * These 'compile-time allocated' memory buffers are
+ * page-sized. Use set_fixmap(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ */
+enum fixed_addresses {
+ FIX_EARLYCON_MEM_BASE,
+ __end_of_permanent_fixed_addresses,
+
+ /*
+ * Temporary boot-time mappings, used by early_ioremap(),
+ * before ioremap() is functional.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define NR_FIX_BTMAPS 4
+#else
+#define NR_FIX_BTMAPS 64
+#endif
+#define FIX_BTMAPS_SLOTS 7
+#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+ __end_of_fixed_addresses
+};
+
+#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
+
+extern void __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags);
+
+#define __set_fixmap __early_set_fixmap
+
+#include <asm-generic/fixmap.h>
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_ARM64_FIXMAP_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 7846a6b..a1bef78 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -27,6 +27,7 @@
#include <asm/byteorder.h>
#include <asm/barrier.h>
#include <asm/pgtable.h>
+#include <asm/early_ioremap.h>
#include <xen/xen.h>
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 9dc5dc3..e94f945 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -49,7 +49,7 @@
#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1))
#define MODULES_END (PAGE_OFFSET)
#define MODULES_VADDR (MODULES_END - SZ_64M)
-#define EARLYCON_IOBASE (MODULES_VADDR - SZ_4M)
+#define FIXADDR_TOP (MODULES_VADDR - SZ_2M - PAGE_SIZE)
#define TASK_SIZE_64 (UL(1) << VA_BITS)
#ifdef CONFIG_COMPAT
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 2494fc0..f600d40 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -27,5 +27,6 @@
extern void paging_init(void);
extern void setup_mm_for_reboot(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
+extern void init_mem_pgprot(void);
#endif
diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c
index fbb6e18..ffbbdde 100644
--- a/arch/arm64/kernel/early_printk.c
+++ b/arch/arm64/kernel/early_printk.c
@@ -26,6 +26,8 @@
#include <linux/amba/serial.h>
#include <linux/serial_reg.h>
+#include <asm/fixmap.h>
+
static void __iomem *early_base;
static void (*printch)(char ch);
@@ -141,8 +143,10 @@
}
/* no options parsing yet */
- if (paddr)
- early_base = early_io_map(paddr, EARLYCON_IOBASE);
+ if (paddr) {
+ set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr);
+ early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE);
+ }
printch = match->printch;
early_console = &early_console_dev;
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 61035d6..1fe5d8d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -404,7 +404,7 @@
* - identity mapping to enable the MMU (low address, TTBR0)
* - first few MB of the kernel linear mapping to jump to once the MMU has
* been enabled, including the FDT blob (TTBR1)
- * - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1)
+ * - pgd entry for fixed mappings (TTBR1)
*/
__create_page_tables:
pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses
@@ -461,15 +461,12 @@
sub x6, x6, #1 // inclusive range
create_block_map x0, x7, x3, x5, x6
1:
-#ifdef CONFIG_EARLY_PRINTK
/*
- * Create the pgd entry for the UART mapping. The full mapping is done
- * later based earlyprintk kernel parameter.
+ * Create the pgd entry for the fixed mappings.
*/
- ldr x5, =EARLYCON_IOBASE // UART virtual address
+ ldr x5, =FIXADDR_TOP // Fixed mapping virtual address
add x0, x26, #2 * PAGE_SIZE // section table address
create_pgd_entry x26, x0, x5, x6, x7
-#endif
ret
ENDPROC(__create_page_tables)
.ltorg
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 67da307..720853f 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -42,6 +42,7 @@
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
+#include <asm/fixmap.h>
#include <asm/cputype.h>
#include <asm/elf.h>
#include <asm/cputable.h>
@@ -360,6 +361,9 @@
*cmdline_p = boot_command_line;
+ init_mem_pgprot();
+ early_ioremap_init();
+
parse_early_param();
arm64_memblock_init();
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 2bb1d58..7ec3283 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -25,6 +25,10 @@
#include <linux/vmalloc.h>
#include <linux/io.h>
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
pgprot_t prot, void *caller)
{
@@ -98,3 +102,84 @@
__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_cache);
+
+#ifndef CONFIG_ARM64_64K_PAGES
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+#endif
+
+static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+
+ pgd = pgd_offset_k(addr);
+ BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+
+ pud = pud_offset(pgd, addr);
+ BUG_ON(pud_none(*pud) || pud_bad(*pud));
+
+ return pmd_offset(pud, addr);
+}
+
+static inline pte_t * __init early_ioremap_pte(unsigned long addr)
+{
+ pmd_t *pmd = early_ioremap_pmd(addr);
+
+ BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
+
+ return pte_offset_kernel(pmd, addr);
+}
+
+void __init early_ioremap_init(void)
+{
+ pmd_t *pmd;
+
+ pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+#ifndef CONFIG_ARM64_64K_PAGES
+ /* need to populate pmd for 4k pagesize only */
+ pmd_populate_kernel(&init_mm, pmd, bm_pte);
+#endif
+ /*
+ * The boot-ioremap range spans multiple pmds, for which
+ * we are not prepared:
+ */
+ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+ if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+ WARN_ON(1);
+ pr_warn("pmd %p != %p\n",
+ pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
+ pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+ fix_to_virt(FIX_BTMAP_BEGIN));
+ pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
+ fix_to_virt(FIX_BTMAP_END));
+
+ pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
+ pr_warn("FIX_BTMAP_BEGIN: %d\n",
+ FIX_BTMAP_BEGIN);
+ }
+
+ early_ioremap_setup();
+}
+
+void __init __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags)
+{
+ unsigned long addr = __fix_to_virt(idx);
+ pte_t *pte;
+
+ if (idx >= __end_of_fixed_addresses) {
+ BUG();
+ return;
+ }
+
+ pte = early_ioremap_pte(addr);
+
+ if (pgprot_val(flags))
+ set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+ else {
+ pte_clear(&init_mm, addr, pte);
+ flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
+ }
+}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f8dc7e8..6b7e895 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -125,7 +125,7 @@
/*
* Adjust the PMD section entries according to the CPU in use.
*/
-static void __init init_mem_pgprot(void)
+void __init init_mem_pgprot(void)
{
pteval_t default_pgprot;
int i;
@@ -260,47 +260,6 @@
} while (pgd++, addr = next, addr != end);
}
-#ifdef CONFIG_EARLY_PRINTK
-/*
- * Create an early I/O mapping using the pgd/pmd entries already populated
- * in head.S as this function is called too early to allocated any memory. The
- * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
- */
-void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
-{
- unsigned long size, mask;
- bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- /*
- * No early pte entries with !ARM64_64K_PAGES configuration, so using
- * sections (pmd).
- */
- size = page64k ? PAGE_SIZE : SECTION_SIZE;
- mask = ~(size - 1);
-
- pgd = pgd_offset_k(virt);
- pud = pud_offset(pgd, virt);
- if (pud_none(*pud))
- return NULL;
- pmd = pmd_offset(pud, virt);
-
- if (page64k) {
- if (pmd_none(*pmd))
- return NULL;
- pte = pte_offset_kernel(pmd, virt);
- set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
- } else {
- set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
- }
-
- return (void __iomem *)((virt & mask) + (phys & ~mask));
-}
-#endif
-
static void __init map_mem(void)
{
struct memblock_region *reg;
@@ -357,7 +316,6 @@
{
void *zero_page;
- init_mem_pgprot();
map_mem();
/*
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index ed0fcdf..52731e2 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -29,7 +29,7 @@
bool
default y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
config FORCE_MAX_ZONEORDER
@@ -138,6 +138,7 @@
bool
default y if ETRAX100LX || ETRAX100LX_V2
default n if !(ETRAX100LX || ETRAX100LX_V2)
+ select TTY
config ETRAX_ARCH_V32
bool
diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c
index 32c3d24..905b70e 100644
--- a/arch/cris/kernel/setup.c
+++ b/arch/cris/kernel/setup.c
@@ -165,6 +165,7 @@
strcpy(init_utsname()->machine, cris_machine_name);
}
+#ifdef CONFIG_PROC_FS
static void *c_start(struct seq_file *m, loff_t *pos)
{
return *pos < nr_cpu_ids ? (void *)(int)(*pos + 1) : NULL;
@@ -188,6 +189,7 @@
.stop = c_stop,
.show = show_cpuinfo,
};
+#endif /* CONFIG_PROC_FS */
static int __init topology_init(void)
{
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index fbc5c78..0fd6138 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -19,7 +19,7 @@
select GENERIC_IRQ_SHOW
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
- select NO_IOPORT
+ select NO_IOPORT_MAP
select GENERIC_IOMAP
select GENERIC_SMP_IDLE_THREAD
select STACKTRACE_SUPPORT
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 0c8e553..1325c3b 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -21,6 +21,7 @@
select HAVE_FUNCTION_TRACER
select HAVE_DMA_ATTRS
select HAVE_KVM
+ select TTY
select HAVE_ARCH_TRACEHOOK
select HAVE_DMA_API_DEBUG
select HAVE_MEMBLOCK
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index ca45044..9e44bbd 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -28,7 +28,7 @@
bool
default y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
config NO_DMA
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b2e3229..87b7c75 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -52,7 +52,7 @@
bool
default y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
config NO_DMA
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
index b1d3c9c..499b761 100644
--- a/arch/metag/Kconfig
+++ b/arch/metag/Kconfig
@@ -52,7 +52,7 @@
config GENERIC_CALIBRATE_DELAY
def_bool y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
source "init/Kconfig"
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 16d5ab1..5cd695f 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -175,7 +175,7 @@
select CPU_R4000_WORKAROUNDS if 64BIT
select CPU_R4400_WORKAROUNDS if 64BIT
select DMA_NONCOHERENT
- select NO_IOPORT
+ select NO_IOPORT_MAP
select IRQ_CPU
select SYS_HAS_CPU_R3000
select SYS_HAS_CPU_R4X00
@@ -947,7 +947,7 @@
config MIPS_MACHINE
def_bool n
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool n
config GENERIC_ISA_DMA
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 9488209..e71d712 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -41,7 +41,7 @@
config GENERIC_HWEIGHT
def_bool y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
config TRACE_IRQFLAGS_SUPPORT
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 88dbf96..a677456 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -210,7 +210,6 @@
extern void crash_fadump(struct pt_regs *, const char *);
extern void fadump_cleanup(void);
-extern void vmcore_cleanup(void);
#else /* CONFIG_FA_DUMP */
static inline int is_fadump_active(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 434fda3..d9e2b19 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -73,6 +73,7 @@
select SYS_SUPPORTS_HUGETLBFS
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
select ARCH_SUPPORTS_NUMA_BALANCING
+ select IRQ_WORK
config PPC_BOOK3E_64
bool "Embedded processors"
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 95dd892..cf2b084 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -531,6 +531,7 @@
sprintf(port->name, "RIO mport %d", i);
priv->dev = &dev->dev;
+ port->dev.parent = &dev->dev;
port->ops = ops;
port->priv = priv;
port->phys_efptr = 0x100;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 953f17c8..346d216 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -52,7 +52,7 @@
config AUDIT_ARCH
def_bool y
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool y
config PCI_QUIRKS
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 796c932..5d8324c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -505,6 +505,9 @@
if (!pmd_present(*pmd) &&
__pte_alloc(mm, vma, pmd, vmaddr))
return -ENOMEM;
+ /* large pmds cannot yet be handled */
+ if (pmd_large(*pmd))
+ return -EFAULT;
/* pmd now points to a valid segment table entry. */
rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
if (!rmap)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 1399383..ba55e93 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -3,7 +3,7 @@
select ARCH_MIGHT_HAVE_PC_PARPORT
select EXPERT
select CLKDEV_LOOKUP
- select HAVE_IDE if HAS_IOPORT
+ select HAVE_IDE if HAS_IOPORT_MAP
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select ARCH_DISCARD_MEMBLOCK
@@ -138,7 +138,7 @@
config ARCH_HAS_ILOG2_U64
def_bool n
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool !PCI
depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN && \
!SH_HP6XX && !SH_SOLUTION_ENGINE
diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
index eb1cf84..e331e53 100644
--- a/arch/sh/boards/Kconfig
+++ b/arch/sh/boards/Kconfig
@@ -158,7 +158,7 @@
bool "SDK7786"
depends on CPU_SUBTYPE_SH7786
select SYS_SUPPORTS_PCI
- select NO_IOPORT if !PCI
+ select NO_IOPORT_MAP if !PCI
select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_SRAM_POOL
select REGULATOR_FIXED_VOLTAGE if REGULATOR
@@ -204,7 +204,7 @@
depends on CPU_SUBTYPE_SH7786
select ARCH_REQUIRE_GPIOLIB
select SYS_SUPPORTS_PCI
- select NO_IOPORT if !PCI
+ select NO_IOPORT_MAP if !PCI
config SH_MIGOR
bool "Migo-R"
@@ -306,7 +306,7 @@
config SH_X3PROTO
bool "SH-X3 Prototype board"
depends on CPU_SUBTYPE_SHX3
- select NO_IOPORT if !PCI
+ select NO_IOPORT_MAP if !PCI
select IRQ_DOMAIN
config SH_MAGIC_PANEL_R2
@@ -333,7 +333,7 @@
config SH_SH2007
bool "SH-2007 board"
- select NO_IOPORT
+ select NO_IOPORT_MAP
select REGULATOR_FIXED_VOLTAGE if REGULATOR
depends on CPU_SUBTYPE_SH7780
help
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 629db2a..728c4c5 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -122,7 +122,7 @@
__BUILD_MEMORY_STRING(__raw_, q, u64)
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
/*
* Slowdown I/O port space accesses for antique hardware.
@@ -218,7 +218,7 @@
__BUILD_IOPORT_STRING(l, u32)
__BUILD_IOPORT_STRING(q, u64)
-#else /* !CONFIG_HAS_IOPORT */
+#else /* !CONFIG_HAS_IOPORT_MAP */
#include <asm/io_noioport.h>
diff --git a/arch/sh/include/asm/io_trapped.h b/arch/sh/include/asm/io_trapped.h
index f1251d4..4ab94ef 100644
--- a/arch/sh/include/asm/io_trapped.h
+++ b/arch/sh/include/asm/io_trapped.h
@@ -36,7 +36,7 @@
#define __ioremap_trapped(offset, size) NULL
#endif
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
extern struct list_head trapped_io;
static inline void __iomem *
diff --git a/arch/sh/include/asm/machvec.h b/arch/sh/include/asm/machvec.h
index eb9c20d..d3324e4 100644
--- a/arch/sh/include/asm/machvec.h
+++ b/arch/sh/include/asm/machvec.h
@@ -21,7 +21,7 @@
int (*mv_irq_demux)(int irq);
void (*mv_init_irq)(void);
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
void __iomem *(*mv_ioport_map)(unsigned long port, unsigned int size);
void (*mv_ioport_unmap)(void __iomem *);
#endif
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 261c8bf..2ccf36c 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -22,7 +22,7 @@
ifndef CONFIG_GENERIC_IOMAP
obj-y += iomap.o
-obj-$(CONFIG_HAS_IOPORT) += ioport.o
+obj-$(CONFIG_HAS_IOPORT_MAP) += ioport.o
endif
obj-$(CONFIG_SUPERH32) += sys_sh32.o
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index c0a9761..f8ce362 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -22,7 +22,7 @@
#define TRAPPED_PAGES_MAX 16
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
LIST_HEAD(trapped_io);
EXPORT_SYMBOL_GPL(trapped_io);
#endif
@@ -90,7 +90,7 @@
tiop->magic = IO_TRAPPED_MAGIC;
INIT_LIST_HEAD(&tiop->list);
spin_lock_irq(&trapped_lock);
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
if (flags & IORESOURCE_IO)
list_add(&tiop->list, &trapped_io);
#endif
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 31c8c62..85258ca 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -411,7 +411,7 @@
config NO_IOMEM
def_bool !PCI
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool !PCI
config TILE_PCI_IO
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index eecc414..f17bca8 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -359,7 +359,7 @@
/*
* Only x86 and x86_64 have an arch_align_stack().
* All other arches have "#define arch_align_stack(x) (x)"
- * in their asm/system.h
+ * in their asm/exec.h
* As this is included in UML from asm-um/system-generic.h,
* we can use it to behave as the subarch does.
*/
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 25c0dba..aafad6f 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -27,7 +27,7 @@
config GENERIC_CSUM
def_bool y
-config NO_IOPORT
+config NO_IOPORT_MAP
bool
config STACKTRACE_SUPPORT
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index fb5e4c6..ef470a7 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -14,6 +14,8 @@
#include <linux/compiler.h>
#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/io.h>
#include <asm/cacheflush.h>
@@ -73,7 +75,7 @@
else \
mm->mmap = NULL; \
rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
- mm->mmap_cache = NULL; \
+ vmacache_invalidate(mm); \
mm->map_count--; \
remove_vma(high_vma); \
} \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f730717..5b8ec0f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -43,6 +43,7 @@
select HAVE_DMA_ATTRS
select HAVE_DMA_CONTIGUOUS if !SWIOTLB
select HAVE_KRETPROBES
+ select GENERIC_EARLY_IOREMAP
select HAVE_OPTPROBES
select HAVE_KPROBES_ON_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4acddc4..3ca9762 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,5 +5,6 @@
genhdr-y += unistd_x32.h
generic-y += clkdev.h
+generic-y += early_ioremap.h
generic-y += cputime.h
generic-y += mcs_spinlock.h
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 2f03ff0..ba38ebb 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -1,7 +1,6 @@
#ifndef _ASM_X86_BUG_H
#define _ASM_X86_BUG_H
-#ifdef CONFIG_BUG
#define HAVE_ARCH_BUG
#ifdef CONFIG_DEBUG_BUGVERBOSE
@@ -33,8 +32,6 @@
} while (0)
#endif
-#endif /* !CONFIG_BUG */
-
#include <asm-generic/bug.h>
#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8dcd35c..43f482a 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -163,5 +163,11 @@
#include <asm-generic/fixmap.h>
+#define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags)
+#define __late_clear_fixmap(idx) __set_fixmap(idx, 0, __pgprot(0))
+
+void __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 91d9c69..b8237d8 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -39,6 +39,7 @@
#include <linux/string.h>
#include <linux/compiler.h>
#include <asm/page.h>
+#include <asm/early_ioremap.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
@@ -316,19 +317,6 @@
unsigned long prot_val);
extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
-/*
- * early_ioremap() and early_iounmap() are for temporary early boot-time
- * mappings, before the real ioremap() is functional.
- * A boot-time mapping is currently limited to at most 16 pages.
- */
-extern void early_ioremap_init(void);
-extern void early_ioremap_reset(void);
-extern void __iomem *early_ioremap(resource_size_t phys_addr,
- unsigned long size);
-extern void __iomem *early_memremap(resource_size_t phys_addr,
- unsigned long size);
-extern void early_iounmap(void __iomem *addr, unsigned long size);
-extern void fixup_early_ioremap(void);
extern bool is_early_ioremap_ptep(pte_t *ptep);
#ifdef CONFIG_XEN
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 94220d1..851bcdc 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -52,7 +52,7 @@
* Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register.
*/
-#define __this_cpu_ptr(ptr) \
+#define raw_cpu_ptr(ptr) \
({ \
unsigned long tcp_ptr__; \
__verify_pcpu_ptr(ptr); \
@@ -362,25 +362,25 @@
*/
#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
-#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
-#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
-#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
+#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
+#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
+#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
+#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
+#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
+#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
+#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
+#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
+#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
+#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
+#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -401,16 +401,16 @@
#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
@@ -427,7 +427,7 @@
__ret; \
})
-#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
+#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
#endif /* CONFIG_X86_CMPXCHG64 */
@@ -436,22 +436,22 @@
* 32 bit must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
-#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
+#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
+#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
+#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
+#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
/*
@@ -474,7 +474,7 @@
__ret; \
})
-#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
+#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
#endif
@@ -495,9 +495,9 @@
unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
#ifdef CONFIG_X86_64
- return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0;
+ return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
#else
- return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0;
+ return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;
#endif
}
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index c8b0519..7024c12 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -19,12 +19,12 @@
*/
static __always_inline int preempt_count(void)
{
- return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
+ return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
}
static __always_inline void preempt_count_set(int pc)
{
- __this_cpu_write_4(__preempt_count, pc);
+ raw_cpu_write_4(__preempt_count, pc);
}
/*
@@ -53,17 +53,17 @@
static __always_inline void set_preempt_need_resched(void)
{
- __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
+ raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
}
static __always_inline void clear_preempt_need_resched(void)
{
- __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
+ raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
}
static __always_inline bool test_preempt_need_resched(void)
{
- return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
+ return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
}
/*
@@ -72,12 +72,12 @@
static __always_inline void __preempt_count_add(int val)
{
- __this_cpu_add_4(__preempt_count, val);
+ raw_cpu_add_4(__preempt_count, val);
}
static __always_inline void __preempt_count_sub(int val)
{
- __this_cpu_add_4(__preempt_count, -val);
+ raw_cpu_add_4(__preempt_count, -val);
}
/*
@@ -95,7 +95,7 @@
*/
static __always_inline bool should_resched(void)
{
- return unlikely(!__this_cpu_read_4(__preempt_count));
+ return unlikely(!raw_cpu_read_4(__preempt_count));
}
#ifdef CONFIG_PREEMPT
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 799580c..597ac15 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -328,17 +328,6 @@
return;
}
-static int __initdata early_ioremap_debug;
-
-static int __init early_ioremap_debug_setup(char *str)
-{
- early_ioremap_debug = 1;
-
- return 0;
-}
-early_param("early_ioremap_debug", early_ioremap_debug_setup);
-
-static __initdata int after_paging_init;
static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
@@ -362,18 +351,11 @@
return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
}
-static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
-
void __init early_ioremap_init(void)
{
pmd_t *pmd;
- int i;
- if (early_ioremap_debug)
- printk(KERN_INFO "early_ioremap_init()\n");
-
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
- slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
+ early_ioremap_setup();
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
memset(bm_pte, 0, sizeof(bm_pte));
@@ -402,13 +384,8 @@
}
}
-void __init early_ioremap_reset(void)
-{
- after_paging_init = 1;
-}
-
-static void __init __early_set_fixmap(enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t flags)
+void __init __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *pte;
@@ -425,198 +402,3 @@
pte_clear(&init_mm, addr, pte);
__flush_tlb_one(addr);
}
-
-static inline void __init early_set_fixmap(enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t prot)
-{
- if (after_paging_init)
- __set_fixmap(idx, phys, prot);
- else
- __early_set_fixmap(idx, phys, prot);
-}
-
-static inline void __init early_clear_fixmap(enum fixed_addresses idx)
-{
- if (after_paging_init)
- clear_fixmap(idx);
- else
- __early_set_fixmap(idx, 0, __pgprot(0));
-}
-
-static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
-static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
-
-void __init fixup_early_ioremap(void)
-{
- int i;
-
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
- if (prev_map[i]) {
- WARN_ON(1);
- break;
- }
- }
-
- early_ioremap_init();
-}
-
-static int __init check_early_ioremap_leak(void)
-{
- int count = 0;
- int i;
-
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
- if (prev_map[i])
- count++;
-
- if (!count)
- return 0;
- WARN(1, KERN_WARNING
- "Debug warning: early ioremap leak of %d areas detected.\n",
- count);
- printk(KERN_WARNING
- "please boot with early_ioremap_debug and report the dmesg.\n");
-
- return 1;
-}
-late_initcall(check_early_ioremap_leak);
-
-static void __init __iomem *
-__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
-{
- unsigned long offset;
- resource_size_t last_addr;
- unsigned int nrpages;
- enum fixed_addresses idx;
- int i, slot;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- slot = -1;
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
- if (!prev_map[i]) {
- slot = i;
- break;
- }
- }
-
- if (slot < 0) {
- printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n",
- __func__, (u64)phys_addr, size);
- WARN_ON(1);
- return NULL;
- }
-
- if (early_ioremap_debug) {
- printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ",
- __func__, (u64)phys_addr, size, slot);
- dump_stack();
- }
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr) {
- WARN_ON(1);
- return NULL;
- }
-
- prev_size[slot] = size;
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr + 1) - phys_addr;
-
- /*
- * Mappings have to fit in the FIX_BTMAP area.
- */
- nrpages = size >> PAGE_SHIFT;
- if (nrpages > NR_FIX_BTMAPS) {
- WARN_ON(1);
- return NULL;
- }
-
- /*
- * Ok, go for it..
- */
- idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
- while (nrpages > 0) {
- early_set_fixmap(idx, phys_addr, prot);
- phys_addr += PAGE_SIZE;
- --idx;
- --nrpages;
- }
- if (early_ioremap_debug)
- printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
-
- prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
- return prev_map[slot];
-}
-
-/* Remap an IO device */
-void __init __iomem *
-early_ioremap(resource_size_t phys_addr, unsigned long size)
-{
- return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
-}
-
-/* Remap memory */
-void __init __iomem *
-early_memremap(resource_size_t phys_addr, unsigned long size)
-{
- return __early_ioremap(phys_addr, size, PAGE_KERNEL);
-}
-
-void __init early_iounmap(void __iomem *addr, unsigned long size)
-{
- unsigned long virt_addr;
- unsigned long offset;
- unsigned int nrpages;
- enum fixed_addresses idx;
- int i, slot;
-
- slot = -1;
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
- if (prev_map[i] == addr) {
- slot = i;
- break;
- }
- }
-
- if (slot < 0) {
- printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n",
- addr, size);
- WARN_ON(1);
- return;
- }
-
- if (prev_size[slot] != size) {
- printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
- addr, size, slot, prev_size[slot]);
- WARN_ON(1);
- return;
- }
-
- if (early_ioremap_debug) {
- printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
- size, slot);
- dump_stack();
- }
-
- virt_addr = (unsigned long)addr;
- if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
- WARN_ON(1);
- return;
- }
- offset = virt_addr & ~PAGE_MASK;
- nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT;
-
- idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
- while (nrpages > 0) {
- early_clear_fixmap(idx);
- --idx;
- --nrpages;
- }
- prev_map[slot] = NULL;
-}
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index a69bcb8..4dd8cf65 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -127,7 +127,7 @@
address = memparse(arg, &arg);
reserve_top_address(address);
- fixup_early_ioremap();
+ early_ioremap_init();
return 0;
}
early_param("reservetop", parse_reservetop);
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index c87ae7c..02d6d29 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -41,7 +41,7 @@
config ARCH_HAS_ILOG2_U64
def_bool n
-config NO_IOPORT
+config NO_IOPORT_MAP
def_bool n
config HZ
@@ -239,7 +239,7 @@
config XTENSA_PLATFORM_S6105
bool "S6105"
select SERIAL_CONSOLE
- select NO_IOPORT
+ select NO_IOPORT_MAP
config XTENSA_PLATFORM_XTFPGA
bool "XTFPGA"
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index 4f23320..d57d917 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -11,7 +11,7 @@
CONFIG_GENERIC_HWEIGHT=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
-CONFIG_NO_IOPORT=y
+CONFIG_NO_IOPORT_MAP=y
CONFIG_HZ=100
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
CONFIG_CONSTRUCTORS=y
diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig
index d929f77..583c2b0 100644
--- a/arch/xtensa/configs/s6105_defconfig
+++ b/arch/xtensa/configs/s6105_defconfig
@@ -11,7 +11,7 @@
CONFIG_GENERIC_HWEIGHT=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
-CONFIG_NO_IOPORT=y
+CONFIG_NO_IOPORT_MAP=y
CONFIG_HZ=100
CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 3450be8..6489c0f 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -15,6 +15,16 @@
See zram.txt for more information.
+config ZRAM_LZ4_COMPRESS
+ bool "Enable LZ4 algorithm support"
+ depends on ZRAM
+ select LZ4_COMPRESS
+ select LZ4_DECOMPRESS
+ default n
+ help
+ This option enables LZ4 compression algorithm support. Compression
+ algorithm can be changed using `comp_algorithm' device attribute.
+
config ZRAM_DEBUG
bool "Compressed RAM block device debug support"
depends on ZRAM
diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile
index cb0f9ce..be0763f 100644
--- a/drivers/block/zram/Makefile
+++ b/drivers/block/zram/Makefile
@@ -1,3 +1,5 @@
-zram-y := zram_drv.o
+zram-y := zcomp_lzo.o zcomp.o zram_drv.o
+
+zram-$(CONFIG_ZRAM_LZ4_COMPRESS) += zcomp_lz4.o
obj-$(CONFIG_ZRAM) += zram.o
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
new file mode 100644
index 0000000..f1ff39a
--- /dev/null
+++ b/drivers/block/zram/zcomp.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (C) 2014 Sergey Senozhatsky.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include "zcomp.h"
+#include "zcomp_lzo.h"
+#ifdef CONFIG_ZRAM_LZ4_COMPRESS
+#include "zcomp_lz4.h"
+#endif
+
+/*
+ * single zcomp_strm backend
+ */
+struct zcomp_strm_single {
+ struct mutex strm_lock;
+ struct zcomp_strm *zstrm;
+};
+
+/*
+ * multi zcomp_strm backend
+ */
+struct zcomp_strm_multi {
+ /* protect strm list */
+ spinlock_t strm_lock;
+ /* max possible number of zstrm streams */
+ int max_strm;
+ /* number of available zstrm streams */
+ int avail_strm;
+ /* list of available strms */
+ struct list_head idle_strm;
+ wait_queue_head_t strm_wait;
+};
+
+static struct zcomp_backend *backends[] = {
+ &zcomp_lzo,
+#ifdef CONFIG_ZRAM_LZ4_COMPRESS
+ &zcomp_lz4,
+#endif
+ NULL
+};
+
+static struct zcomp_backend *find_backend(const char *compress)
+{
+ int i = 0;
+ while (backends[i]) {
+ if (sysfs_streq(compress, backends[i]->name))
+ break;
+ i++;
+ }
+ return backends[i];
+}
+
+static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm)
+{
+ if (zstrm->private)
+ comp->backend->destroy(zstrm->private);
+ free_pages((unsigned long)zstrm->buffer, 1);
+ kfree(zstrm);
+}
+
+/*
+ * allocate new zcomp_strm structure with ->private initialized by
+ * backend, return NULL on error
+ */
+static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp)
+{
+ struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL);
+ if (!zstrm)
+ return NULL;
+
+ zstrm->private = comp->backend->create();
+ /*
+ * allocate 2 pages. 1 for compressed data, plus 1 extra for the
+ * case when compressed size is larger than the original one
+ */
+ zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+ if (!zstrm->private || !zstrm->buffer) {
+ zcomp_strm_free(comp, zstrm);
+ zstrm = NULL;
+ }
+ return zstrm;
+}
+
+/*
+ * get idle zcomp_strm or wait until other process release
+ * (zcomp_strm_release()) one for us
+ */
+static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp)
+{
+ struct zcomp_strm_multi *zs = comp->stream;
+ struct zcomp_strm *zstrm;
+
+ while (1) {
+ spin_lock(&zs->strm_lock);
+ if (!list_empty(&zs->idle_strm)) {
+ zstrm = list_entry(zs->idle_strm.next,
+ struct zcomp_strm, list);
+ list_del(&zstrm->list);
+ spin_unlock(&zs->strm_lock);
+ return zstrm;
+ }
+ /* zstrm streams limit reached, wait for idle stream */
+ if (zs->avail_strm >= zs->max_strm) {
+ spin_unlock(&zs->strm_lock);
+ wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
+ continue;
+ }
+ /* allocate new zstrm stream */
+ zs->avail_strm++;
+ spin_unlock(&zs->strm_lock);
+
+ zstrm = zcomp_strm_alloc(comp);
+ if (!zstrm) {
+ spin_lock(&zs->strm_lock);
+ zs->avail_strm--;
+ spin_unlock(&zs->strm_lock);
+ wait_event(zs->strm_wait, !list_empty(&zs->idle_strm));
+ continue;
+ }
+ break;
+ }
+ return zstrm;
+}
+
+/* add stream back to idle list and wake up waiter or free the stream */
+static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm)
+{
+ struct zcomp_strm_multi *zs = comp->stream;
+
+ spin_lock(&zs->strm_lock);
+ if (zs->avail_strm <= zs->max_strm) {
+ list_add(&zstrm->list, &zs->idle_strm);
+ spin_unlock(&zs->strm_lock);
+ wake_up(&zs->strm_wait);
+ return;
+ }
+
+ zs->avail_strm--;
+ spin_unlock(&zs->strm_lock);
+ zcomp_strm_free(comp, zstrm);
+}
+
+/* change max_strm limit */
+static bool zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm)
+{
+ struct zcomp_strm_multi *zs = comp->stream;
+ struct zcomp_strm *zstrm;
+
+ spin_lock(&zs->strm_lock);
+ zs->max_strm = num_strm;
+ /*
+ * if user has lowered the limit and there are idle streams,
+ * immediately free as much streams (and memory) as we can.
+ */
+ while (zs->avail_strm > num_strm && !list_empty(&zs->idle_strm)) {
+ zstrm = list_entry(zs->idle_strm.next,
+ struct zcomp_strm, list);
+ list_del(&zstrm->list);
+ zcomp_strm_free(comp, zstrm);
+ zs->avail_strm--;
+ }
+ spin_unlock(&zs->strm_lock);
+ return true;
+}
+
+static void zcomp_strm_multi_destroy(struct zcomp *comp)
+{
+ struct zcomp_strm_multi *zs = comp->stream;
+ struct zcomp_strm *zstrm;
+
+ while (!list_empty(&zs->idle_strm)) {
+ zstrm = list_entry(zs->idle_strm.next,
+ struct zcomp_strm, list);
+ list_del(&zstrm->list);
+ zcomp_strm_free(comp, zstrm);
+ }
+ kfree(zs);
+}
+
+static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm)
+{
+ struct zcomp_strm *zstrm;
+ struct zcomp_strm_multi *zs;
+
+ comp->destroy = zcomp_strm_multi_destroy;
+ comp->strm_find = zcomp_strm_multi_find;
+ comp->strm_release = zcomp_strm_multi_release;
+ comp->set_max_streams = zcomp_strm_multi_set_max_streams;
+ zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL);
+ if (!zs)
+ return -ENOMEM;
+
+ comp->stream = zs;
+ spin_lock_init(&zs->strm_lock);
+ INIT_LIST_HEAD(&zs->idle_strm);
+ init_waitqueue_head(&zs->strm_wait);
+ zs->max_strm = max_strm;
+ zs->avail_strm = 1;
+
+ zstrm = zcomp_strm_alloc(comp);
+ if (!zstrm) {
+ kfree(zs);
+ return -ENOMEM;
+ }
+ list_add(&zstrm->list, &zs->idle_strm);
+ return 0;
+}
+
+static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp)
+{
+ struct zcomp_strm_single *zs = comp->stream;
+ mutex_lock(&zs->strm_lock);
+ return zs->zstrm;
+}
+
+static void zcomp_strm_single_release(struct zcomp *comp,
+ struct zcomp_strm *zstrm)
+{
+ struct zcomp_strm_single *zs = comp->stream;
+ mutex_unlock(&zs->strm_lock);
+}
+
+static bool zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm)
+{
+ /* zcomp_strm_single support only max_comp_streams == 1 */
+ return false;
+}
+
+static void zcomp_strm_single_destroy(struct zcomp *comp)
+{
+ struct zcomp_strm_single *zs = comp->stream;
+ zcomp_strm_free(comp, zs->zstrm);
+ kfree(zs);
+}
+
+static int zcomp_strm_single_create(struct zcomp *comp)
+{
+ struct zcomp_strm_single *zs;
+
+ comp->destroy = zcomp_strm_single_destroy;
+ comp->strm_find = zcomp_strm_single_find;
+ comp->strm_release = zcomp_strm_single_release;
+ comp->set_max_streams = zcomp_strm_single_set_max_streams;
+ zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL);
+ if (!zs)
+ return -ENOMEM;
+
+ comp->stream = zs;
+ mutex_init(&zs->strm_lock);
+ zs->zstrm = zcomp_strm_alloc(comp);
+ if (!zs->zstrm) {
+ kfree(zs);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/* show available compressors */
+ssize_t zcomp_available_show(const char *comp, char *buf)
+{
+ ssize_t sz = 0;
+ int i = 0;
+
+ while (backends[i]) {
+ if (sysfs_streq(comp, backends[i]->name))
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
+ "[%s] ", backends[i]->name);
+ else
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
+ "%s ", backends[i]->name);
+ i++;
+ }
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
+ return sz;
+}
+
+bool zcomp_set_max_streams(struct zcomp *comp, int num_strm)
+{
+ return comp->set_max_streams(comp, num_strm);
+}
+
+struct zcomp_strm *zcomp_strm_find(struct zcomp *comp)
+{
+ return comp->strm_find(comp);
+}
+
+void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm)
+{
+ comp->strm_release(comp, zstrm);
+}
+
+int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
+ const unsigned char *src, size_t *dst_len)
+{
+ return comp->backend->compress(src, zstrm->buffer, dst_len,
+ zstrm->private);
+}
+
+int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
+ size_t src_len, unsigned char *dst)
+{
+ return comp->backend->decompress(src, src_len, dst);
+}
+
+void zcomp_destroy(struct zcomp *comp)
+{
+ comp->destroy(comp);
+ kfree(comp);
+}
+
+/*
+ * search available compressors for requested algorithm.
+ * allocate new zcomp and initialize it. return compressing
+ * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL)
+ * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in
+ * case of allocation error.
+ */
+struct zcomp *zcomp_create(const char *compress, int max_strm)
+{
+ struct zcomp *comp;
+ struct zcomp_backend *backend;
+
+ backend = find_backend(compress);
+ if (!backend)
+ return ERR_PTR(-EINVAL);
+
+ comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
+ if (!comp)
+ return ERR_PTR(-ENOMEM);
+
+ comp->backend = backend;
+ if (max_strm > 1)
+ zcomp_strm_multi_create(comp, max_strm);
+ else
+ zcomp_strm_single_create(comp);
+ if (!comp->stream) {
+ kfree(comp);
+ return ERR_PTR(-ENOMEM);
+ }
+ return comp;
+}
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
new file mode 100644
index 0000000..c59d1fc
--- /dev/null
+++ b/drivers/block/zram/zcomp.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 Sergey Senozhatsky.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ZCOMP_H_
+#define _ZCOMP_H_
+
+#include <linux/mutex.h>
+
+struct zcomp_strm {
+ /* compression/decompression buffer */
+ void *buffer;
+ /*
+ * The private data of the compression stream, only compression
+ * stream backend can touch this (e.g. compression algorithm
+ * working memory)
+ */
+ void *private;
+ /* used in multi stream backend, protected by backend strm_lock */
+ struct list_head list;
+};
+
+/* static compression backend */
+struct zcomp_backend {
+ int (*compress)(const unsigned char *src, unsigned char *dst,
+ size_t *dst_len, void *private);
+
+ int (*decompress)(const unsigned char *src, size_t src_len,
+ unsigned char *dst);
+
+ void *(*create)(void);
+ void (*destroy)(void *private);
+
+ const char *name;
+};
+
+/* dynamic per-device compression frontend */
+struct zcomp {
+ void *stream;
+ struct zcomp_backend *backend;
+
+ struct zcomp_strm *(*strm_find)(struct zcomp *comp);
+ void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm);
+ bool (*set_max_streams)(struct zcomp *comp, int num_strm);
+ void (*destroy)(struct zcomp *comp);
+};
+
+ssize_t zcomp_available_show(const char *comp, char *buf);
+
+struct zcomp *zcomp_create(const char *comp, int max_strm);
+void zcomp_destroy(struct zcomp *comp);
+
+struct zcomp_strm *zcomp_strm_find(struct zcomp *comp);
+void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm);
+
+int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm,
+ const unsigned char *src, size_t *dst_len);
+
+int zcomp_decompress(struct zcomp *comp, const unsigned char *src,
+ size_t src_len, unsigned char *dst);
+
+bool zcomp_set_max_streams(struct zcomp *comp, int num_strm);
+#endif /* _ZCOMP_H_ */
diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c
new file mode 100644
index 0000000..f2afb7e
--- /dev/null
+++ b/drivers/block/zram/zcomp_lz4.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 Sergey Senozhatsky.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/lz4.h>
+
+#include "zcomp_lz4.h"
+
+static void *zcomp_lz4_create(void)
+{
+ return kzalloc(LZ4_MEM_COMPRESS, GFP_KERNEL);
+}
+
+static void zcomp_lz4_destroy(void *private)
+{
+ kfree(private);
+}
+
+static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst,
+ size_t *dst_len, void *private)
+{
+ /* return : Success if return 0 */
+ return lz4_compress(src, PAGE_SIZE, dst, dst_len, private);
+}
+
+static int zcomp_lz4_decompress(const unsigned char *src, size_t src_len,
+ unsigned char *dst)
+{
+ size_t dst_len = PAGE_SIZE;
+ /* return : Success if return 0 */
+ return lz4_decompress_unknownoutputsize(src, src_len, dst, &dst_len);
+}
+
+struct zcomp_backend zcomp_lz4 = {
+ .compress = zcomp_lz4_compress,
+ .decompress = zcomp_lz4_decompress,
+ .create = zcomp_lz4_create,
+ .destroy = zcomp_lz4_destroy,
+ .name = "lz4",
+};
diff --git a/drivers/block/zram/zcomp_lz4.h b/drivers/block/zram/zcomp_lz4.h
new file mode 100644
index 0000000..60613fb
--- /dev/null
+++ b/drivers/block/zram/zcomp_lz4.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2014 Sergey Senozhatsky.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ZCOMP_LZ4_H_
+#define _ZCOMP_LZ4_H_
+
+#include "zcomp.h"
+
+extern struct zcomp_backend zcomp_lz4;
+
+#endif /* _ZCOMP_LZ4_H_ */
diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c
new file mode 100644
index 0000000..da1bc47
--- /dev/null
+++ b/drivers/block/zram/zcomp_lzo.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 Sergey Senozhatsky.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/lzo.h>
+
+#include "zcomp_lzo.h"
+
+static void *lzo_create(void)
+{
+ return kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+}
+
+static void lzo_destroy(void *private)
+{
+ kfree(private);
+}
+
+static int lzo_compress(const unsigned char *src, unsigned char *dst,
+ size_t *dst_len, void *private)
+{
+ int ret = lzo1x_1_compress(src, PAGE_SIZE, dst, dst_len, private);
+ return ret == LZO_E_OK ? 0 : ret;
+}
+
+static int lzo_decompress(const unsigned char *src, size_t src_len,
+ unsigned char *dst)
+{
+ size_t dst_len = PAGE_SIZE;
+ int ret = lzo1x_decompress_safe(src, src_len, dst, &dst_len);
+ return ret == LZO_E_OK ? 0 : ret;
+}
+
+struct zcomp_backend zcomp_lzo = {
+ .compress = lzo_compress,
+ .decompress = lzo_decompress,
+ .create = lzo_create,
+ .destroy = lzo_destroy,
+ .name = "lzo",
+};
diff --git a/drivers/block/zram/zcomp_lzo.h b/drivers/block/zram/zcomp_lzo.h
new file mode 100644
index 0000000..128c580
--- /dev/null
+++ b/drivers/block/zram/zcomp_lzo.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2014 Sergey Senozhatsky.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ZCOMP_LZO_H_
+#define _ZCOMP_LZO_H_
+
+#include "zcomp.h"
+
+extern struct zcomp_backend zcomp_lzo;
+
+#endif /* _ZCOMP_LZO_H_ */
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 51c557c..9849b52 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -29,19 +29,36 @@
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/slab.h>
-#include <linux/lzo.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
+#include <linux/err.h>
#include "zram_drv.h"
/* Globals */
static int zram_major;
static struct zram *zram_devices;
+static const char *default_compressor = "lzo";
/* Module params (documentation at end) */
static unsigned int num_devices = 1;
+#define ZRAM_ATTR_RO(name) \
+static ssize_t zram_attr_##name##_show(struct device *d, \
+ struct device_attribute *attr, char *b) \
+{ \
+ struct zram *zram = dev_to_zram(d); \
+ return scnprintf(b, PAGE_SIZE, "%llu\n", \
+ (u64)atomic64_read(&zram->stats.name)); \
+} \
+static struct device_attribute dev_attr_##name = \
+ __ATTR(name, S_IRUGO, zram_attr_##name##_show, NULL);
+
+static inline int init_done(struct zram *zram)
+{
+ return zram->meta != NULL;
+}
+
static inline struct zram *dev_to_zram(struct device *dev)
{
return (struct zram *)dev_to_disk(dev)->private_data;
@@ -52,59 +69,20 @@
{
struct zram *zram = dev_to_zram(dev);
- return sprintf(buf, "%llu\n", zram->disksize);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
}
static ssize_t initstate_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ u32 val;
struct zram *zram = dev_to_zram(dev);
- return sprintf(buf, "%u\n", zram->init_done);
-}
+ down_read(&zram->init_lock);
+ val = init_done(zram);
+ up_read(&zram->init_lock);
-static ssize_t num_reads_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- (u64)atomic64_read(&zram->stats.num_reads));
-}
-
-static ssize_t num_writes_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- (u64)atomic64_read(&zram->stats.num_writes));
-}
-
-static ssize_t invalid_io_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- (u64)atomic64_read(&zram->stats.invalid_io));
-}
-
-static ssize_t notify_free_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- (u64)atomic64_read(&zram->stats.notify_free));
-}
-
-static ssize_t zero_pages_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero));
+ return scnprintf(buf, PAGE_SIZE, "%u\n", val);
}
static ssize_t orig_data_size_show(struct device *dev,
@@ -112,17 +90,8 @@
{
struct zram *zram = dev_to_zram(dev);
- return sprintf(buf, "%llu\n",
- (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
-}
-
-static ssize_t compr_data_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%llu\n",
- (u64)atomic64_read(&zram->stats.compr_size));
+ return scnprintf(buf, PAGE_SIZE, "%llu\n",
+ (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
}
static ssize_t mem_used_total_show(struct device *dev,
@@ -133,11 +102,81 @@
struct zram_meta *meta = zram->meta;
down_read(&zram->init_lock);
- if (zram->init_done)
+ if (init_done(zram))
val = zs_get_total_size_bytes(meta->mem_pool);
up_read(&zram->init_lock);
- return sprintf(buf, "%llu\n", val);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t max_comp_streams_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int val;
+ struct zram *zram = dev_to_zram(dev);
+
+ down_read(&zram->init_lock);
+ val = zram->max_comp_streams;
+ up_read(&zram->init_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t max_comp_streams_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ int num;
+ struct zram *zram = dev_to_zram(dev);
+ int ret;
+
+ ret = kstrtoint(buf, 0, &num);
+ if (ret < 0)
+ return ret;
+ if (num < 1)
+ return -EINVAL;
+
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ if (!zcomp_set_max_streams(zram->comp, num)) {
+ pr_info("Cannot change max compression streams\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ zram->max_comp_streams = num;
+ ret = len;
+out:
+ up_write(&zram->init_lock);
+ return ret;
+}
+
+static ssize_t comp_algorithm_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ size_t sz;
+ struct zram *zram = dev_to_zram(dev);
+
+ down_read(&zram->init_lock);
+ sz = zcomp_available_show(zram->compressor, buf);
+ up_read(&zram->init_lock);
+
+ return sz;
+}
+
+static ssize_t comp_algorithm_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ up_write(&zram->init_lock);
+ pr_info("Can't change algorithm for initialized device\n");
+ return -EBUSY;
+ }
+ strlcpy(zram->compressor, buf, sizeof(zram->compressor));
+ up_write(&zram->init_lock);
+ return len;
}
/* flag operations needs meta->tb_lock */
@@ -192,8 +231,6 @@
static void zram_meta_free(struct zram_meta *meta)
{
zs_destroy_pool(meta->mem_pool);
- kfree(meta->compress_workmem);
- free_pages((unsigned long)meta->compress_buffer, 1);
vfree(meta->table);
kfree(meta);
}
@@ -205,22 +242,11 @@
if (!meta)
goto out;
- meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
- if (!meta->compress_workmem)
- goto free_meta;
-
- meta->compress_buffer =
- (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
- if (!meta->compress_buffer) {
- pr_err("Error allocating compressor buffer space\n");
- goto free_workmem;
- }
-
num_pages = disksize >> PAGE_SHIFT;
meta->table = vzalloc(num_pages * sizeof(*meta->table));
if (!meta->table) {
pr_err("Error allocating zram address table\n");
- goto free_buffer;
+ goto free_meta;
}
meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
@@ -230,15 +256,10 @@
}
rwlock_init(&meta->tb_lock);
- mutex_init(&meta->buffer_lock);
return meta;
free_table:
vfree(meta->table);
-free_buffer:
- free_pages((unsigned long)meta->compress_buffer, 1);
-free_workmem:
- kfree(meta->compress_workmem);
free_meta:
kfree(meta);
meta = NULL;
@@ -288,7 +309,6 @@
{
struct zram_meta *meta = zram->meta;
unsigned long handle = meta->table[index].handle;
- u16 size = meta->table[index].size;
if (unlikely(!handle)) {
/*
@@ -297,21 +317,15 @@
*/
if (zram_test_flag(meta, index, ZRAM_ZERO)) {
zram_clear_flag(meta, index, ZRAM_ZERO);
- atomic_dec(&zram->stats.pages_zero);
+ atomic64_dec(&zram->stats.zero_pages);
}
return;
}
- if (unlikely(size > max_zpage_size))
- atomic_dec(&zram->stats.bad_compress);
-
zs_free(meta->mem_pool, handle);
- if (size <= PAGE_SIZE / 2)
- atomic_dec(&zram->stats.good_compress);
-
- atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
- atomic_dec(&zram->stats.pages_stored);
+ atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size);
+ atomic64_dec(&zram->stats.pages_stored);
meta->table[index].handle = 0;
meta->table[index].size = 0;
@@ -319,8 +333,7 @@
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
{
- int ret = LZO_E_OK;
- size_t clen = PAGE_SIZE;
+ int ret = 0;
unsigned char *cmem;
struct zram_meta *meta = zram->meta;
unsigned long handle;
@@ -340,12 +353,12 @@
if (size == PAGE_SIZE)
copy_page(mem, cmem);
else
- ret = lzo1x_decompress_safe(cmem, size, mem, &clen);
+ ret = zcomp_decompress(zram->comp, cmem, size, mem);
zs_unmap_object(meta->mem_pool, handle);
read_unlock(&meta->tb_lock);
/* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret != LZO_E_OK)) {
+ if (unlikely(ret)) {
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
atomic64_inc(&zram->stats.failed_reads);
return ret;
@@ -388,7 +401,7 @@
ret = zram_decompress_page(zram, uncmem, index);
/* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret != LZO_E_OK))
+ if (unlikely(ret))
goto out_cleanup;
if (is_partial_io(bvec))
@@ -413,11 +426,10 @@
struct page *page;
unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
struct zram_meta *meta = zram->meta;
+ struct zcomp_strm *zstrm;
bool locked = false;
page = bvec->bv_page;
- src = meta->compress_buffer;
-
if (is_partial_io(bvec)) {
/*
* This is a partial IO. We need to read the full page
@@ -433,7 +445,7 @@
goto out;
}
- mutex_lock(&meta->buffer_lock);
+ zstrm = zcomp_strm_find(zram->comp);
locked = true;
user_mem = kmap_atomic(page);
@@ -454,28 +466,25 @@
zram_set_flag(meta, index, ZRAM_ZERO);
write_unlock(&zram->meta->tb_lock);
- atomic_inc(&zram->stats.pages_zero);
+ atomic64_inc(&zram->stats.zero_pages);
ret = 0;
goto out;
}
- ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
- meta->compress_workmem);
+ ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
if (!is_partial_io(bvec)) {
kunmap_atomic(user_mem);
user_mem = NULL;
uncmem = NULL;
}
- if (unlikely(ret != LZO_E_OK)) {
+ if (unlikely(ret)) {
pr_err("Compression failed! err=%d\n", ret);
goto out;
}
-
+ src = zstrm->buffer;
if (unlikely(clen > max_zpage_size)) {
- atomic_inc(&zram->stats.bad_compress);
clen = PAGE_SIZE;
- src = NULL;
if (is_partial_io(bvec))
src = uncmem;
}
@@ -497,6 +506,8 @@
memcpy(cmem, src, clen);
}
+ zcomp_strm_release(zram->comp, zstrm);
+ locked = false;
zs_unmap_object(meta->mem_pool, handle);
/*
@@ -511,49 +522,88 @@
write_unlock(&zram->meta->tb_lock);
/* Update stats */
- atomic64_add(clen, &zram->stats.compr_size);
- atomic_inc(&zram->stats.pages_stored);
- if (clen <= PAGE_SIZE / 2)
- atomic_inc(&zram->stats.good_compress);
-
+ atomic64_add(clen, &zram->stats.compr_data_size);
+ atomic64_inc(&zram->stats.pages_stored);
out:
if (locked)
- mutex_unlock(&meta->buffer_lock);
+ zcomp_strm_release(zram->comp, zstrm);
if (is_partial_io(bvec))
kfree(uncmem);
-
if (ret)
atomic64_inc(&zram->stats.failed_writes);
return ret;
}
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, struct bio *bio, int rw)
+ int offset, struct bio *bio)
{
int ret;
+ int rw = bio_data_dir(bio);
- if (rw == READ)
+ if (rw == READ) {
+ atomic64_inc(&zram->stats.num_reads);
ret = zram_bvec_read(zram, bvec, index, offset, bio);
- else
+ } else {
+ atomic64_inc(&zram->stats.num_writes);
ret = zram_bvec_write(zram, bvec, index, offset);
+ }
return ret;
}
+/*
+ * zram_bio_discard - handler on discard request
+ * @index: physical block index in PAGE_SIZE units
+ * @offset: byte offset within physical block
+ */
+static void zram_bio_discard(struct zram *zram, u32 index,
+ int offset, struct bio *bio)
+{
+ size_t n = bio->bi_iter.bi_size;
+
+ /*
+ * zram manages data in physical block size units. Because logical block
+ * size isn't identical with physical block size on some arch, we
+ * could get a discard request pointing to a specific offset within a
+ * certain physical block. Although we can handle this request by
+ * reading that physiclal block and decompressing and partially zeroing
+ * and re-compressing and then re-storing it, this isn't reasonable
+ * because our intent with a discard request is to save memory. So
+ * skipping this logical block is appropriate here.
+ */
+ if (offset) {
+ if (n < offset)
+ return;
+
+ n -= offset;
+ index++;
+ }
+
+ while (n >= PAGE_SIZE) {
+ /*
+ * Discard request can be large so the lock hold times could be
+ * lengthy. So take the lock once per page.
+ */
+ write_lock(&zram->meta->tb_lock);
+ zram_free_page(zram, index);
+ write_unlock(&zram->meta->tb_lock);
+ index++;
+ n -= PAGE_SIZE;
+ }
+}
+
static void zram_reset_device(struct zram *zram, bool reset_capacity)
{
size_t index;
struct zram_meta *meta;
down_write(&zram->init_lock);
- if (!zram->init_done) {
+ if (!init_done(zram)) {
up_write(&zram->init_lock);
return;
}
meta = zram->meta;
- zram->init_done = 0;
-
/* Free all pages that are still in this zram device */
for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
unsigned long handle = meta->table[index].handle;
@@ -563,6 +613,9 @@
zs_free(meta->mem_pool, handle);
}
+ zcomp_destroy(zram->comp);
+ zram->max_comp_streams = 1;
+
zram_meta_free(zram->meta);
zram->meta = NULL;
/* Reset stats */
@@ -574,37 +627,14 @@
up_write(&zram->init_lock);
}
-static void zram_init_device(struct zram *zram, struct zram_meta *meta)
-{
- if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) {
- pr_info(
- "There is little point creating a zram of greater than "
- "twice the size of memory since we expect a 2:1 compression "
- "ratio. Note that zram uses about 0.1%% of the size of "
- "the disk when not in use so a huge zram is "
- "wasteful.\n"
- "\tMemory Size: %lu kB\n"
- "\tSize you selected: %llu kB\n"
- "Continuing anyway ...\n",
- (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10
- );
- }
-
- /* zram devices sort of resembles non-rotational disks */
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
-
- zram->meta = meta;
- zram->init_done = 1;
-
- pr_debug("Initialization done!\n");
-}
-
static ssize_t disksize_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
u64 disksize;
+ struct zcomp *comp;
struct zram_meta *meta;
struct zram *zram = dev_to_zram(dev);
+ int err;
disksize = memparse(buf, NULL);
if (!disksize)
@@ -614,20 +644,35 @@
meta = zram_meta_alloc(disksize);
if (!meta)
return -ENOMEM;
- down_write(&zram->init_lock);
- if (zram->init_done) {
- up_write(&zram->init_lock);
- zram_meta_free(meta);
- pr_info("Cannot change disksize for initialized device\n");
- return -EBUSY;
+
+ comp = zcomp_create(zram->compressor, zram->max_comp_streams);
+ if (IS_ERR(comp)) {
+ pr_info("Cannot initialise %s compressing backend\n",
+ zram->compressor);
+ err = PTR_ERR(comp);
+ goto out_free_meta;
}
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ pr_info("Cannot change disksize for initialized device\n");
+ err = -EBUSY;
+ goto out_destroy_comp;
+ }
+
+ zram->meta = meta;
+ zram->comp = comp;
zram->disksize = disksize;
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
- zram_init_device(zram, meta);
up_write(&zram->init_lock);
-
return len;
+
+out_destroy_comp:
+ up_write(&zram->init_lock);
+ zcomp_destroy(comp);
+out_free_meta:
+ zram_meta_free(meta);
+ return err;
}
static ssize_t reset_store(struct device *dev,
@@ -671,26 +716,23 @@
return ret;
}
-static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
+static void __zram_make_request(struct zram *zram, struct bio *bio)
{
int offset;
u32 index;
struct bio_vec bvec;
struct bvec_iter iter;
- switch (rw) {
- case READ:
- atomic64_inc(&zram->stats.num_reads);
- break;
- case WRITE:
- atomic64_inc(&zram->stats.num_writes);
- break;
- }
-
index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
offset = (bio->bi_iter.bi_sector &
(SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
+ if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+ zram_bio_discard(zram, index, offset, bio);
+ bio_endio(bio, 0);
+ return;
+ }
+
bio_for_each_segment(bvec, bio, iter) {
int max_transfer_size = PAGE_SIZE - offset;
@@ -705,16 +747,15 @@
bv.bv_len = max_transfer_size;
bv.bv_offset = bvec.bv_offset;
- if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
+ if (zram_bvec_rw(zram, &bv, index, offset, bio) < 0)
goto out;
bv.bv_len = bvec.bv_len - max_transfer_size;
bv.bv_offset += max_transfer_size;
- if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
+ if (zram_bvec_rw(zram, &bv, index + 1, 0, bio) < 0)
goto out;
} else
- if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw)
- < 0)
+ if (zram_bvec_rw(zram, &bvec, index, offset, bio) < 0)
goto out;
update_position(&index, &offset, &bvec);
@@ -736,7 +777,7 @@
struct zram *zram = queue->queuedata;
down_read(&zram->init_lock);
- if (unlikely(!zram->init_done))
+ if (unlikely(!init_done(zram)))
goto error;
if (!valid_io_request(zram, bio)) {
@@ -744,7 +785,7 @@
goto error;
}
- __zram_make_request(zram, bio, bio_data_dir(bio));
+ __zram_make_request(zram, bio);
up_read(&zram->init_lock);
return;
@@ -778,14 +819,21 @@
disksize_show, disksize_store);
static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
-static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
-static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
-static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
-static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
-static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
-static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
+static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR,
+ max_comp_streams_show, max_comp_streams_store);
+static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR,
+ comp_algorithm_show, comp_algorithm_store);
+
+ZRAM_ATTR_RO(num_reads);
+ZRAM_ATTR_RO(num_writes);
+ZRAM_ATTR_RO(failed_reads);
+ZRAM_ATTR_RO(failed_writes);
+ZRAM_ATTR_RO(invalid_io);
+ZRAM_ATTR_RO(notify_free);
+ZRAM_ATTR_RO(zero_pages);
+ZRAM_ATTR_RO(compr_data_size);
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -793,12 +841,16 @@
&dev_attr_reset.attr,
&dev_attr_num_reads.attr,
&dev_attr_num_writes.attr,
+ &dev_attr_failed_reads.attr,
+ &dev_attr_failed_writes.attr,
&dev_attr_invalid_io.attr,
&dev_attr_notify_free.attr,
&dev_attr_zero_pages.attr,
&dev_attr_orig_data_size.attr,
&dev_attr_compr_data_size.attr,
&dev_attr_mem_used_total.attr,
+ &dev_attr_max_comp_streams.attr,
+ &dev_attr_comp_algorithm.attr,
NULL,
};
@@ -839,7 +891,8 @@
/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
set_capacity(zram->disk, 0);
-
+ /* zram devices sort of resembles non-rotational disks */
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
/*
* To ensure that we always get PAGE_SIZE aligned
* and n*PAGE_SIZED sized I/O requests.
@@ -849,6 +902,21 @@
ZRAM_LOGICAL_BLOCK_SIZE);
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
+ zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
+ zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
+ /*
+ * zram_bio_discard() will clear all logical blocks if logical block
+ * size is identical with physical block size(PAGE_SIZE). But if it is
+ * different, we will skip discarding some parts of logical blocks in
+ * the part of the request range which isn't aligned to physical block
+ * size. So we can't ensure that all discarded logical blocks are
+ * zeroed.
+ */
+ if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
+ zram->disk->queue->limits.discard_zeroes_data = 1;
+ else
+ zram->disk->queue->limits.discard_zeroes_data = 0;
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
add_disk(zram->disk);
@@ -858,8 +926,9 @@
pr_warn("Error creating sysfs group");
goto out_free_disk;
}
-
- zram->init_done = 0;
+ strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
+ zram->meta = NULL;
+ zram->max_comp_streams = 1;
return 0;
out_free_disk:
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index ad8aa35..7f21c14 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -16,9 +16,10 @@
#define _ZRAM_DRV_H_
#include <linux/spinlock.h>
-#include <linux/mutex.h>
#include <linux/zsmalloc.h>
+#include "zcomp.h"
+
/*
* Some arbitrary value. This is just to catch
* invalid value for num_devices module parameter.
@@ -64,38 +65,33 @@
struct table {
unsigned long handle;
u16 size; /* object size (excluding header) */
- u8 count; /* object ref count (not yet used) */
u8 flags;
} __aligned(4);
struct zram_stats {
- atomic64_t compr_size; /* compressed size of pages stored */
+ atomic64_t compr_data_size; /* compressed size of pages stored */
atomic64_t num_reads; /* failed + successful */
atomic64_t num_writes; /* --do-- */
atomic64_t failed_reads; /* should NEVER! happen */
atomic64_t failed_writes; /* can happen when memory is too low */
atomic64_t invalid_io; /* non-page-aligned I/O requests */
atomic64_t notify_free; /* no. of swap slot free notifications */
- atomic_t pages_zero; /* no. of zero filled pages */
- atomic_t pages_stored; /* no. of pages currently stored */
- atomic_t good_compress; /* % of pages with compression ratio<=50% */
- atomic_t bad_compress; /* % of pages with compression ratio>=75% */
+ atomic64_t zero_pages; /* no. of zero filled pages */
+ atomic64_t pages_stored; /* no. of pages currently stored */
};
struct zram_meta {
rwlock_t tb_lock; /* protect table */
- void *compress_workmem;
- void *compress_buffer;
struct table *table;
struct zs_pool *mem_pool;
- struct mutex buffer_lock; /* protect compress buffers */
};
struct zram {
struct zram_meta *meta;
struct request_queue *queue;
struct gendisk *disk;
- int init_done;
+ struct zcomp *comp;
+
/* Prevent concurrent execution of device init, reset and R/W request */
struct rw_semaphore init_lock;
/*
@@ -103,7 +99,8 @@
* we can store in a disk.
*/
u64 disksize; /* bytes */
-
+ int max_comp_streams;
struct zram_stats stats;
+ char compressor[10];
};
#endif
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 1a65838..c54cac3 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -74,7 +74,7 @@
config TCG_ATMEL
tristate "Atmel TPM Interface"
- depends on PPC64 || HAS_IOPORT
+ depends on PPC64 || HAS_IOPORT_MAP
---help---
If you have a TPM security chip from Atmel say Yes and it
will be accessible from within Linux. To compile this driver
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index de17c55..014afab 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -936,7 +936,7 @@
config I2C_ELEKTOR
tristate "Elektor ISA card"
- depends on ISA && HAS_IOPORT && BROKEN_ON_SMP
+ depends on ISA && HAS_IOPORT_MAP && BROKEN_ON_SMP
select I2C_ALGOPCF
help
This supports the PCF8584 ISA bus I2C adapter. Say Y if you own
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index bfb39bb..e8b55c3 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -887,7 +887,7 @@
* _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if
* they set _PAGE_DIRTY then we can put a writable PTE entry in immediately.
*/
-static void do_set_pte(struct lg_cpu *cpu, int idx,
+static void __guest_set_pte(struct lg_cpu *cpu, int idx,
unsigned long vaddr, pte_t gpte)
{
/* Look up the matching shadow page directory entry. */
@@ -960,13 +960,13 @@
unsigned int i;
for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++)
if (cpu->lg->pgdirs[i].pgdir)
- do_set_pte(cpu, i, vaddr, gpte);
+ __guest_set_pte(cpu, i, vaddr, gpte);
} else {
/* Is this page table one we have a shadow for? */
int pgdir = find_pgdir(cpu->lg, gpgdir);
if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs))
/* If so, do the update. */
- do_set_pte(cpu, pgdir, vaddr, gpte);
+ __guest_set_pte(cpu, pgdir, vaddr, gpte);
}
}
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
index 2bef3f7..a3700a5 100644
--- a/drivers/misc/sgi-gru/grukdump.c
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -178,10 +178,10 @@
hdr.cbrcnt = cbrcnt;
hdr.dsrcnt = dsrcnt;
hdr.cch_locked = cch_locked;
- if (!ret && copy_to_user((void __user *)uhdr, &hdr, sizeof(hdr)))
- ret = -EFAULT;
+ if (copy_to_user(uhdr, &hdr, sizeof(hdr)))
+ return -EFAULT;
- return ret ? ret : bytes;
+ return bytes;
}
int gru_dump_chiplet_request(unsigned long arg)
diff --git a/drivers/net/can/sja1000/Kconfig b/drivers/net/can/sja1000/Kconfig
index 4b18b87..1e65cb6 100644
--- a/drivers/net/can/sja1000/Kconfig
+++ b/drivers/net/can/sja1000/Kconfig
@@ -39,7 +39,7 @@
config CAN_PEAK_PCMCIA
tristate "PEAK PCAN-PC Card"
depends on PCMCIA
- depends on HAS_IOPORT
+ depends on HAS_IOPORT_MAP
---help---
This driver is for the PCAN-PC Card PCMCIA adapter (1 or 2 channels)
from PEAK-System (http://www.peak-system.com). To compile this
diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig
index 65b735d..afaab4b 100644
--- a/drivers/net/ethernet/3com/Kconfig
+++ b/drivers/net/ethernet/3com/Kconfig
@@ -66,7 +66,7 @@
config VORTEX
tristate "3c590/3c900 series (592/595/597) \"Vortex/Boomerang\" support"
- depends on (PCI || EISA) && HAS_IOPORT
+ depends on (PCI || EISA) && HAS_IOPORT_MAP
select MII
---help---
This option enables driver support for a large number of 10Mbps and
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 6d1f6ed..a849718 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -493,6 +493,7 @@
ndev->netdev_ops = &rionet_netdev_ops;
ndev->mtu = RIO_MAX_MSG_SIZE - 14;
ndev->features = NETIF_F_LLTX;
+ SET_NETDEV_DEV(ndev, &mport->dev);
SET_ETHTOOL_OPS(ndev, &rionet_ethtool_ops);
spin_lock_init(&rnet->lock);
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index ff7cbf2..1753dc6 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -2256,6 +2256,7 @@
mport->phy_type = RIO_PHY_SERIAL;
mport->priv = (void *)priv;
mport->phys_efptr = 0x100;
+ mport->dev.parent = &pdev->dev;
priv->mport = mport;
INIT_LIST_HEAD(&mport->dbells);
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
index 7061ac0..0305675 100644
--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h
@@ -644,6 +644,9 @@
#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+#define TSI721_BDMA_BD_RING_SZ 128
+#define TSI721_BDMA_MAX_BCOUNT (TSI721_DMAD_BCOUNT1 + 1)
+
struct tsi721_tx_desc {
struct dma_async_tx_descriptor txd;
struct tsi721_dma_desc *hw_desc;
@@ -652,6 +655,7 @@
u64 rio_addr;
/* upper 2-bits of 66-bit RIO address */
u8 rio_addr_u;
+ u32 bcount;
bool interrupt;
struct list_head desc_node;
struct list_head tx_list;
diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c
index 91245f5..9b60b1f 100644
--- a/drivers/rapidio/devices/tsi721_dma.c
+++ b/drivers/rapidio/devices/tsi721_dma.c
@@ -304,35 +304,17 @@
}
static int
-tsi721_fill_desc(struct tsi721_bdma_chan *bdma_chan,
- struct tsi721_tx_desc *desc, struct scatterlist *sg,
+tsi721_desc_fill_init(struct tsi721_tx_desc *desc, struct scatterlist *sg,
enum dma_rtype rtype, u32 sys_size)
{
struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
u64 rio_addr;
- if (sg_dma_len(sg) > TSI721_DMAD_BCOUNT1 + 1) {
- dev_err(bdma_chan->dchan.device->dev,
- "SG element is too large\n");
- return -EINVAL;
- }
-
- dev_dbg(bdma_chan->dchan.device->dev,
- "desc: 0x%llx, addr: 0x%llx len: 0x%x\n",
- (u64)desc->txd.phys, (unsigned long long)sg_dma_address(sg),
- sg_dma_len(sg));
-
- dev_dbg(bdma_chan->dchan.device->dev,
- "bd_ptr = %p did=%d raddr=0x%llx\n",
- bd_ptr, desc->destid, desc->rio_addr);
-
/* Initialize DMA descriptor */
bd_ptr->type_id = cpu_to_le32((DTYPE1 << 29) |
(rtype << 19) | desc->destid);
- if (desc->interrupt)
- bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
bd_ptr->bcount = cpu_to_le32(((desc->rio_addr & 0x3) << 30) |
- (sys_size << 26) | sg_dma_len(sg));
+ (sys_size << 26));
rio_addr = (desc->rio_addr >> 2) |
((u64)(desc->rio_addr_u & 0x3) << 62);
bd_ptr->raddr_lo = cpu_to_le32(rio_addr & 0xffffffff);
@@ -346,6 +328,20 @@
return 0;
}
+static int
+tsi721_desc_fill_end(struct tsi721_tx_desc *desc)
+{
+ struct tsi721_dma_desc *bd_ptr = desc->hw_desc;
+
+ /* Update DMA descriptor */
+ if (desc->interrupt)
+ bd_ptr->type_id |= cpu_to_le32(TSI721_DMAD_IOF);
+ bd_ptr->bcount |= cpu_to_le32(desc->bcount & TSI721_DMAD_BCOUNT1);
+
+ return 0;
+}
+
+
static void tsi721_dma_chain_complete(struct tsi721_bdma_chan *bdma_chan,
struct tsi721_tx_desc *desc)
{
@@ -674,6 +670,7 @@
unsigned int i;
u32 sys_size = dma_to_mport(dchan->device)->sys_size;
enum dma_rtype rtype;
+ dma_addr_t next_addr = -1;
if (!sgl || !sg_len) {
dev_err(dchan->device->dev, "%s: No SG list\n", __func__);
@@ -704,36 +701,84 @@
for_each_sg(sgl, sg, sg_len, i) {
int err;
- dev_dbg(dchan->device->dev, "%s: sg #%d\n", __func__, i);
+ if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
+ dev_err(dchan->device->dev,
+ "%s: SG entry %d is too large\n", __func__, i);
+ goto err_desc_put;
+ }
+
+ /*
+ * If this sg entry forms contiguous block with previous one,
+ * try to merge it into existing DMA descriptor
+ */
+ if (desc) {
+ if (next_addr == sg_dma_address(sg) &&
+ desc->bcount + sg_dma_len(sg) <=
+ TSI721_BDMA_MAX_BCOUNT) {
+ /* Adjust byte count of the descriptor */
+ desc->bcount += sg_dma_len(sg);
+ goto entry_done;
+ }
+
+ /*
+ * Finalize this descriptor using total
+ * byte count value.
+ */
+ tsi721_desc_fill_end(desc);
+ dev_dbg(dchan->device->dev, "%s: desc final len: %d\n",
+ __func__, desc->bcount);
+ }
+
+ /*
+ * Obtain and initialize a new descriptor
+ */
desc = tsi721_desc_get(bdma_chan);
if (!desc) {
dev_err(dchan->device->dev,
- "Not enough descriptors available\n");
- goto err_desc_get;
+ "%s: Failed to get new descriptor for SG %d\n",
+ __func__, i);
+ goto err_desc_put;
}
- if (sg_is_last(sg))
- desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
- else
- desc->interrupt = false;
-
desc->destid = rext->destid;
desc->rio_addr = rio_addr;
desc->rio_addr_u = 0;
+ desc->bcount = sg_dma_len(sg);
- err = tsi721_fill_desc(bdma_chan, desc, sg, rtype, sys_size);
+ dev_dbg(dchan->device->dev,
+ "sg%d desc: 0x%llx, addr: 0x%llx len: %d\n",
+ i, (u64)desc->txd.phys,
+ (unsigned long long)sg_dma_address(sg),
+ sg_dma_len(sg));
+
+ dev_dbg(dchan->device->dev,
+ "bd_ptr = %p did=%d raddr=0x%llx\n",
+ desc->hw_desc, desc->destid, desc->rio_addr);
+
+ err = tsi721_desc_fill_init(desc, sg, rtype, sys_size);
if (err) {
dev_err(dchan->device->dev,
"Failed to build desc: %d\n", err);
- goto err_desc_get;
+ goto err_desc_put;
}
- rio_addr += sg_dma_len(sg);
+ next_addr = sg_dma_address(sg);
if (!first)
first = desc;
else
list_add_tail(&desc->desc_node, &first->tx_list);
+
+entry_done:
+ if (sg_is_last(sg)) {
+ desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
+ tsi721_desc_fill_end(desc);
+ dev_dbg(dchan->device->dev, "%s: desc final len: %d\n",
+ __func__, desc->bcount);
+ } else {
+ rio_addr += sg_dma_len(sg);
+ next_addr += sg_dma_len(sg);
+ }
}
first->txd.cookie = -EBUSY;
@@ -741,7 +786,7 @@
return &first->txd;
-err_desc_get:
+err_desc_put:
tsi721_desc_put(bdma_chan, first);
return NULL;
}
@@ -792,7 +837,7 @@
if (i == TSI721_DMACH_MAINT)
continue;
- bdma_chan->bd_num = 64;
+ bdma_chan->bd_num = TSI721_BDMA_BD_RING_SZ;
bdma_chan->regs = priv->regs + TSI721_DMAC_BASE(i);
bdma_chan->dchan.device = &mport->dma;
diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index c9ae692..f301f05 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -167,7 +167,6 @@
void rio_attach_device(struct rio_dev *rdev)
{
rdev->dev.bus = &rio_bus_type;
- rdev->dev.parent = &rio_bus;
}
EXPORT_SYMBOL_GPL(rio_attach_device);
@@ -216,9 +215,12 @@
return 0;
}
-struct device rio_bus = {
- .init_name = "rapidio",
+struct class rio_mport_class = {
+ .name = "rapidio_port",
+ .owner = THIS_MODULE,
+ .dev_groups = rio_mport_groups,
};
+EXPORT_SYMBOL_GPL(rio_mport_class);
struct bus_type rio_bus_type = {
.name = "rapidio",
@@ -233,14 +235,20 @@
/**
* rio_bus_init - Register the RapidIO bus with the device model
*
- * Registers the RIO bus device and RIO bus type with the Linux
+ * Registers the RIO mport device class and RIO bus type with the Linux
* device model.
*/
static int __init rio_bus_init(void)
{
- if (device_register(&rio_bus) < 0)
- printk("RIO: failed to register RIO bus device\n");
- return bus_register(&rio_bus_type);
+ int ret;
+
+ ret = class_register(&rio_mport_class);
+ if (!ret) {
+ ret = bus_register(&rio_bus_type);
+ if (ret)
+ class_unregister(&rio_mport_class);
+ }
+ return ret;
}
postcore_initcall(rio_bus_init);
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index d3a6539..47a1b2e 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -461,6 +461,7 @@
rdev->comp_tag & RIO_CTAG_UDEVID);
}
+ rdev->dev.parent = &port->dev;
rio_attach_device(rdev);
device_initialize(&rdev->dev);
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index e0221c6..cdb005c 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -341,3 +341,43 @@
&rio_bus_group,
NULL,
};
+
+static ssize_t
+port_destid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct rio_mport *mport = to_rio_mport(dev);
+
+ if (mport)
+ return sprintf(buf, "0x%04x\n", mport->host_deviceid);
+ else
+ return -ENODEV;
+}
+static DEVICE_ATTR_RO(port_destid);
+
+static ssize_t sys_size_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct rio_mport *mport = to_rio_mport(dev);
+
+ if (mport)
+ return sprintf(buf, "%u\n", mport->sys_size);
+ else
+ return -ENODEV;
+}
+static DEVICE_ATTR_RO(sys_size);
+
+static struct attribute *rio_mport_attrs[] = {
+ &dev_attr_port_destid.attr,
+ &dev_attr_sys_size.attr,
+ NULL,
+};
+
+static const struct attribute_group rio_mport_group = {
+ .attrs = rio_mport_attrs,
+};
+
+const struct attribute_group *rio_mport_groups[] = {
+ &rio_mport_group,
+ NULL,
+};
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 2e8a20c..a54ba04 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1884,6 +1884,7 @@
int rio_register_mport(struct rio_mport *port)
{
struct rio_scan_node *scan = NULL;
+ int res = 0;
if (next_portid >= RIO_MAX_MPORTS) {
pr_err("RIO: reached specified max number of mports\n");
@@ -1894,6 +1895,16 @@
port->host_deviceid = rio_get_hdid(port->id);
port->nscan = NULL;
+ dev_set_name(&port->dev, "rapidio%d", port->id);
+ port->dev.class = &rio_mport_class;
+
+ res = device_register(&port->dev);
+ if (res)
+ dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n",
+ port->id, res);
+ else
+ dev_dbg(&port->dev, "RIO: mport%d registered\n", port->id);
+
mutex_lock(&rio_mport_list_lock);
list_add_tail(&port->node, &rio_mports);
diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index 5f99d22..2d0550e 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h
@@ -50,6 +50,7 @@
/* Structures internal to the RIO core code */
extern const struct attribute_group *rio_dev_groups[];
extern const struct attribute_group *rio_bus_groups[];
+extern const struct attribute_group *rio_mport_groups[];
#define RIO_GET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x00ff0000) >> 16))
#define RIO_SET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x000000ff) << 16))
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index a16b0ff4..d822320 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -832,6 +832,7 @@
static const struct vm_operations_struct v9fs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = v9fs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
@@ -839,6 +840,7 @@
static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
.close = v9fs_mmap_vm_close,
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = v9fs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 952aeb0..9852bdf 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -266,7 +266,7 @@
inode_init_once(&ei->vfs_inode);
}
-static int init_inodecache(void)
+static int __init init_inodecache(void)
{
adfs_inode_cachep = kmem_cache_create("adfs_inode_cache",
sizeof(struct adfs_inode_info),
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 3952121..25b23b1 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -5,14 +5,6 @@
#include <linux/mutex.h>
#include <linux/workqueue.h>
-/* AmigaOS allows file names with up to 30 characters length.
- * Names longer than that will be silently truncated. If you
- * want to disallow this, comment out the following #define.
- * Creating filesystem objects with longer names will then
- * result in an error (ENAMETOOLONG).
- */
-/*#define AFFS_NO_TRUNCATE */
-
/* Ugly macros make the code more pretty. */
#define GET_END_PTR(st,p,sz) ((st *)((char *)(p)+((sz)-sizeof(st))))
@@ -28,7 +20,6 @@
#define AFFS_CACHE_SIZE PAGE_SIZE
-#define AFFS_MAX_PREALLOC 32
#define AFFS_LC_SIZE (AFFS_CACHE_SIZE/sizeof(u32)/2)
#define AFFS_AC_SIZE (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2)
#define AFFS_AC_MASK (AFFS_AC_SIZE-1)
@@ -118,6 +109,7 @@
#define SF_OFS 0x0200 /* Old filesystem */
#define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */
#define SF_VERBOSE 0x0800 /* Talk about fs when mounting */
+#define SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */
/* short cut to get to the affs specific sb data */
static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
@@ -137,9 +129,13 @@
extern void secs_to_datestamp(time_t secs, struct affs_date *ds);
extern umode_t prot_to_mode(u32 prot);
extern void mode_to_prot(struct inode *inode);
-extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...);
-extern void affs_warning(struct super_block *sb, const char *function, const char *fmt, ...);
-extern int affs_check_name(const unsigned char *name, int len);
+extern void affs_error(struct super_block *sb, const char *function,
+ const char *fmt, ...);
+extern void affs_warning(struct super_block *sb, const char *function,
+ const char *fmt, ...);
+extern bool affs_nofilenametruncate(const struct dentry *dentry);
+extern int affs_check_name(const unsigned char *name, int len,
+ bool notruncate);
extern int affs_copy_name(unsigned char *bstr, struct dentry *dentry);
/* bitmap. c */
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index d9a4367..533a322 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -471,20 +471,27 @@
function,ErrorBuffer);
}
+bool
+affs_nofilenametruncate(const struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ return AFFS_SB(inode->i_sb)->s_flags & SF_NO_TRUNCATE;
+
+}
+
/* Check if the name is valid for a affs object. */
int
-affs_check_name(const unsigned char *name, int len)
+affs_check_name(const unsigned char *name, int len, bool notruncate)
{
int i;
- if (len > 30)
-#ifdef AFFS_NO_TRUNCATE
- return -ENAMETOOLONG;
-#else
- len = 30;
-#endif
-
+ if (len > 30) {
+ if (notruncate)
+ return -ENAMETOOLONG;
+ else
+ len = 30;
+ }
for (i = 0; i < len; i++) {
if (name[i] < ' ' || name[i] == ':'
|| (name[i] > 0x7e && name[i] < 0xa0))
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index f1eba8c..cbbda47 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -52,8 +52,10 @@
int hash_pos;
int chain_pos;
u32 ino;
+ int error = 0;
- pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",inode->i_ino,(unsigned long)ctx->pos);
+ pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",
+ inode->i_ino, (unsigned long)ctx->pos);
if (ctx->pos < 2) {
file->private_data = (void *)0;
@@ -72,7 +74,7 @@
}
dir_bh = affs_bread(sb, inode->i_ino);
if (!dir_bh)
- goto readdir_out;
+ goto out_unlock_dir;
/* If the directory hasn't changed since the last call to readdir(),
* we can jump directly to where we left off.
@@ -88,7 +90,8 @@
fh_bh = affs_bread(sb, ino);
if (!fh_bh) {
affs_error(sb, "readdir","Cannot read block %d", i);
- return -EIO;
+ error = -EIO;
+ goto out_brelse_dir;
}
ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain);
affs_brelse(fh_bh);
@@ -107,29 +110,34 @@
do {
fh_bh = affs_bread(sb, ino);
if (!fh_bh) {
- affs_error(sb, "readdir","Cannot read block %d", ino);
+ affs_error(sb, "readdir",
+ "Cannot read block %d", ino);
break;
}
namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30);
name = AFFS_TAIL(sb, fh_bh)->name + 1;
- pr_debug("AFFS: readdir(): filldir(\"%.*s\", ino=%u), hash=%d, f_pos=%x\n",
+ pr_debug("AFFS: readdir(): dir_emit(\"%.*s\", "
+ "ino=%u), hash=%d, f_pos=%x\n",
namelen, name, ino, hash_pos, (u32)ctx->pos);
+
if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN))
- goto readdir_done;
+ goto done;
ctx->pos++;
ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain);
affs_brelse(fh_bh);
fh_bh = NULL;
} while (ino);
}
-readdir_done:
+done:
file->f_version = inode->i_version;
file->private_data = (void *)(long)ino;
-
-readdir_out:
- affs_brelse(dir_bh);
affs_brelse(fh_bh);
+
+out_brelse_dir:
+ affs_brelse(dir_bh);
+
+out_unlock_dir:
affs_unlock_dir(inode);
- return 0;
+ return error;
}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index c36cbb4..6dae1cc 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -60,13 +60,13 @@
* Note: the dentry argument is the parent dentry.
*/
static inline int
-__affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
+__affs_hash_dentry(struct qstr *qstr, toupper_t toupper, bool notruncate)
{
const u8 *name = qstr->name;
unsigned long hash;
int i;
- i = affs_check_name(qstr->name, qstr->len);
+ i = affs_check_name(qstr->name, qstr->len, notruncate);
if (i)
return i;
@@ -82,16 +82,22 @@
static int
affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
{
- return __affs_hash_dentry(qstr, affs_toupper);
+ return __affs_hash_dentry(qstr, affs_toupper,
+ affs_nofilenametruncate(dentry));
+
}
+
static int
affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
{
- return __affs_hash_dentry(qstr, affs_intl_toupper);
+ return __affs_hash_dentry(qstr, affs_intl_toupper,
+ affs_nofilenametruncate(dentry));
+
}
static inline int __affs_compare_dentry(unsigned int len,
- const char *str, const struct qstr *name, toupper_t toupper)
+ const char *str, const struct qstr *name, toupper_t toupper,
+ bool notruncate)
{
const u8 *aname = str;
const u8 *bname = name->name;
@@ -101,7 +107,7 @@
* must be valid. 'name' must be validated first.
*/
- if (affs_check_name(name->name, name->len))
+ if (affs_check_name(name->name, name->len, notruncate))
return 1;
/*
@@ -126,13 +132,18 @@
affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
- return __affs_compare_dentry(len, str, name, affs_toupper);
+
+ return __affs_compare_dentry(len, str, name, affs_toupper,
+ affs_nofilenametruncate(parent));
}
+
static int
affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
- return __affs_compare_dentry(len, str, name, affs_intl_toupper);
+ return __affs_compare_dentry(len, str, name, affs_intl_toupper,
+ affs_nofilenametruncate(parent));
+
}
/*
@@ -411,7 +422,10 @@
(u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name,
(u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name);
- retval = affs_check_name(new_dentry->d_name.name,new_dentry->d_name.len);
+ retval = affs_check_name(new_dentry->d_name.name,
+ new_dentry->d_name.len,
+ affs_nofilenametruncate(old_dentry));
+
if (retval)
return retval;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 3074530..6d589f2 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -128,7 +128,7 @@
inode_init_once(&ei->vfs_inode);
}
-static int init_inodecache(void)
+static int __init init_inodecache(void)
{
affs_inode_cachep = kmem_cache_create("affs_inode_cache",
sizeof(struct affs_inode_info),
@@ -163,7 +163,7 @@
};
enum {
- Opt_bs, Opt_mode, Opt_mufs, Opt_prefix, Opt_protect,
+ Opt_bs, Opt_mode, Opt_mufs, Opt_notruncate, Opt_prefix, Opt_protect,
Opt_reserved, Opt_root, Opt_setgid, Opt_setuid,
Opt_verbose, Opt_volume, Opt_ignore, Opt_err,
};
@@ -172,6 +172,7 @@
{Opt_bs, "bs=%u"},
{Opt_mode, "mode=%o"},
{Opt_mufs, "mufs"},
+ {Opt_notruncate, "nofilenametruncate"},
{Opt_prefix, "prefix=%s"},
{Opt_protect, "protect"},
{Opt_reserved, "reserved=%u"},
@@ -233,6 +234,9 @@
case Opt_mufs:
*mount_opts |= SF_MUFS;
break;
+ case Opt_notruncate:
+ *mount_opts |= SF_NO_TRUNCATE;
+ break;
case Opt_prefix:
*prefix = match_strdup(&args[0]);
if (!*prefix)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 29aa5cf..7041ac3 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -266,7 +266,7 @@
inode_init_once(&bi->vfs_inode);
}
-static int init_inodecache(void)
+static int __init init_inodecache(void)
{
bfs_inode_cachep = kmem_cache_create("bfs_inode_cache",
sizeof(struct bfs_inode_info),
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0f59799..aa3cb62 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -584,7 +584,6 @@
unsigned long start_code, end_code, start_data, end_data;
unsigned long reloc_func_desc __maybe_unused = 0;
int executable_stack = EXSTACK_DEFAULT;
- unsigned long def_flags = 0;
struct pt_regs *regs = current_pt_regs();
struct {
struct elfhdr elf_ex;
@@ -724,9 +723,6 @@
if (retval)
goto out_free_dentry;
- /* OK, This is the point of no return */
- current->mm->def_flags = def_flags;
-
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
SET_PERSONALITY(loc->elf_ex);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e1ffb1e..c660527 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2025,6 +2025,7 @@
static const struct vm_operations_struct btrfs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = btrfs_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 834fce7..216d7e9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3113,6 +3113,7 @@
static struct vm_operations_struct cifs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = cifs_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/exec.c b/fs/exec.c
index 25dfeba..9e81c63 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/swap.h>
@@ -822,7 +823,7 @@
static int exec_mmap(struct mm_struct *mm)
{
struct task_struct *tsk;
- struct mm_struct * old_mm, *active_mm;
+ struct mm_struct *old_mm, *active_mm;
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
@@ -848,6 +849,8 @@
tsk->mm = mm;
tsk->active_mm = mm;
activate_mm(active_mm, mm);
+ tsk->mm->vmacache_seqnum = 0;
+ vmacache_flush(tsk);
task_unlock(tsk);
if (old_mm) {
up_read(&old_mm->mmap_sem);
@@ -1043,7 +1046,7 @@
* so that a new one can be started
*/
-void set_task_comm(struct task_struct *tsk, char *buf)
+void set_task_comm(struct task_struct *tsk, const char *buf)
{
task_lock(tsk);
trace_task_rename(tsk, buf);
@@ -1052,21 +1055,6 @@
perf_event_comm(tsk);
}
-static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
-{
- int i, ch;
-
- /* Copies the binary name from after last slash */
- for (i = 0; (ch = *(fn++)) != '\0';) {
- if (ch == '/')
- i = 0; /* overwrite what we wrote */
- else
- if (i < len - 1)
- tcomm[i++] = ch;
- }
- tcomm[i] = '\0';
-}
-
int flush_old_exec(struct linux_binprm * bprm)
{
int retval;
@@ -1080,8 +1068,6 @@
goto out;
set_mm_exe_file(bprm->mm, bprm->file);
-
- filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
/*
* Release all of the old mmap stuff
*/
@@ -1124,7 +1110,7 @@
else
set_dumpable(current->mm, suid_dumpable);
- set_task_comm(current, bprm->tcomm);
+ set_task_comm(current, kbasename(bprm->filename));
/* Set the new mm task size. We have to do that late because it may
* depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6db7f7d..4e508fc 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,6 +200,7 @@
static const struct vm_operations_struct ext4_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 302d552..60e7d54 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -84,6 +84,7 @@
static const struct vm_operations_struct f2fs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = f2fs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 65df7d8..48992ca 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2117,6 +2117,7 @@
static const struct vm_operations_struct fuse_file_vm_ops = {
.close = fuse_vma_close,
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = fuse_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6c79408..80d6725 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -494,6 +494,7 @@
static const struct vm_operations_struct gfs2_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = gfs2_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5bb790a..284ca90 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -617,6 +617,7 @@
static const struct vm_operations_struct nfs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = nfs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 08fdb77..f3a82fb 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -134,6 +134,7 @@
static const struct vm_operations_struct nilfs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = nilfs_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c
index 807150e..dd6103c 100644
--- a/fs/ntfs/debug.c
+++ b/fs/ntfs/debug.c
@@ -18,16 +18,9 @@
* distribution in the file COPYING); if not, write to the Free Software
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "debug.h"
-/*
- * A static buffer to hold the error string being displayed and a spinlock
- * to protect concurrent accesses to it.
- */
-static char err_buf[1024];
-static DEFINE_SPINLOCK(err_buf_lock);
-
/**
* __ntfs_warning - output a warning to the syslog
* @function: name of function outputting the warning
@@ -50,6 +43,7 @@
void __ntfs_warning(const char *function, const struct super_block *sb,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
int flen = 0;
@@ -59,17 +53,15 @@
#endif
if (function)
flen = strlen(function);
- spin_lock(&err_buf_lock);
va_start(args, fmt);
- vsnprintf(err_buf, sizeof(err_buf), fmt, args);
- va_end(args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
if (sb)
- printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n",
- sb->s_id, flen ? function : "", err_buf);
+ pr_warn("(device %s): %s(): %pV\n",
+ sb->s_id, flen ? function : "", &vaf);
else
- printk(KERN_ERR "NTFS-fs warning: %s(): %s\n",
- flen ? function : "", err_buf);
- spin_unlock(&err_buf_lock);
+ pr_warn("%s(): %pV\n", flen ? function : "", &vaf);
+ va_end(args);
}
/**
@@ -94,6 +86,7 @@
void __ntfs_error(const char *function, const struct super_block *sb,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
int flen = 0;
@@ -103,17 +96,15 @@
#endif
if (function)
flen = strlen(function);
- spin_lock(&err_buf_lock);
va_start(args, fmt);
- vsnprintf(err_buf, sizeof(err_buf), fmt, args);
- va_end(args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
if (sb)
- printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n",
- sb->s_id, flen ? function : "", err_buf);
+ pr_err("(device %s): %s(): %pV\n",
+ sb->s_id, flen ? function : "", &vaf);
else
- printk(KERN_ERR "NTFS-fs error: %s(): %s\n",
- flen ? function : "", err_buf);
- spin_unlock(&err_buf_lock);
+ pr_err("%s(): %pV\n", flen ? function : "", &vaf);
+ va_end(args);
}
#ifdef DEBUG
@@ -124,6 +115,7 @@
void __ntfs_debug (const char *file, int line, const char *function,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
int flen = 0;
@@ -131,13 +123,11 @@
return;
if (function)
flen = strlen(function);
- spin_lock(&err_buf_lock);
va_start(args, fmt);
- vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ pr_debug("(%s, %d): %s(): %pV", file, line, flen ? function : "", &vaf);
va_end(args);
- printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s(): %s\n", file, line,
- flen ? function : "", err_buf);
- spin_unlock(&err_buf_lock);
}
/* Dump a runlist. Caller has to provide synchronisation for @rl. */
@@ -149,12 +139,12 @@
if (!debug_msgs)
return;
- printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping runlist (values in hex):\n");
+ pr_debug("Dumping runlist (values in hex):\n");
if (!rl) {
- printk(KERN_DEBUG "Run list not present.\n");
+ pr_debug("Run list not present.\n");
return;
}
- printk(KERN_DEBUG "VCN LCN Run length\n");
+ pr_debug("VCN LCN Run length\n");
for (i = 0; ; i++) {
LCN lcn = (rl + i)->lcn;
@@ -163,13 +153,13 @@
if (index > -LCN_ENOENT - 1)
index = 3;
- printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n",
+ pr_debug("%-16Lx %s %-16Lx%s\n",
(long long)(rl + i)->vcn, lcn_str[index],
(long long)(rl + i)->length,
(rl + i)->length ? "" :
" (runlist end)");
} else
- printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n",
+ pr_debug("%-16Lx %-16Lx %-16Lx%s\n",
(long long)(rl + i)->vcn,
(long long)(rl + i)->lcn,
(long long)(rl + i)->length,
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 53c27ea..61bf091 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -48,7 +48,12 @@
#else /* !DEBUG */
-#define ntfs_debug(f, a...) do {} while (0)
+#define ntfs_debug(fmt, ...) \
+do { \
+ if (0) \
+ no_printk(fmt, ##__VA_ARGS__); \
+} while (0)
+
#define ntfs_debug_dump_runlist(rl) do {} while (0)
#endif /* !DEBUG */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index bd5610d..9de2491 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -19,6 +19,7 @@
* distribution in the file COPYING); if not, write to the Free Software
* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/stddef.h>
#include <linux/init.h>
@@ -1896,7 +1897,7 @@
vol->minor_ver = vi->minor_ver;
ntfs_attr_put_search_ctx(ctx);
unmap_mft_record(NTFS_I(vol->vol_ino));
- printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver,
+ pr_info("volume version %i.%i.\n", vol->major_ver,
vol->minor_ver);
if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
ntfs_warning(vol->sb, "Disabling sparse support due to NTFS "
@@ -3095,7 +3096,7 @@
int err = 0;
/* This may be ugly but it results in pretty output so who cares. (-8 */
- printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/"
+ pr_info("driver " NTFS_VERSION " [Flags: R/"
#ifdef NTFS_RW
"W"
#else
@@ -3115,16 +3116,15 @@
sizeof(ntfs_index_context), 0 /* offset */,
SLAB_HWCACHE_ALIGN, NULL /* ctor */);
if (!ntfs_index_ctx_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_index_ctx_cache_name);
+ pr_crit("Failed to create %s!\n", ntfs_index_ctx_cache_name);
goto ictx_err_out;
}
ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name,
sizeof(ntfs_attr_search_ctx), 0 /* offset */,
SLAB_HWCACHE_ALIGN, NULL /* ctor */);
if (!ntfs_attr_ctx_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_attr_ctx_cache_name);
+ pr_crit("NTFS: Failed to create %s!\n",
+ ntfs_attr_ctx_cache_name);
goto actx_err_out;
}
@@ -3132,8 +3132,7 @@
(NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!ntfs_name_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_name_cache_name);
+ pr_crit("Failed to create %s!\n", ntfs_name_cache_name);
goto name_err_out;
}
@@ -3141,8 +3140,7 @@
sizeof(ntfs_inode), 0,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
if (!ntfs_inode_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_inode_cache_name);
+ pr_crit("Failed to create %s!\n", ntfs_inode_cache_name);
goto inode_err_out;
}
@@ -3151,15 +3149,14 @@
SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
ntfs_big_inode_init_once);
if (!ntfs_big_inode_cache) {
- printk(KERN_CRIT "NTFS: Failed to create %s!\n",
- ntfs_big_inode_cache_name);
+ pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name);
goto big_inode_err_out;
}
/* Register the ntfs sysctls. */
err = ntfs_sysctl(1);
if (err) {
- printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n");
+ pr_crit("Failed to register NTFS sysctls!\n");
goto sysctl_err_out;
}
@@ -3168,7 +3165,7 @@
ntfs_debug("NTFS driver registered successfully.");
return 0; /* Success! */
}
- printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n");
+ pr_crit("Failed to register NTFS filesystem driver!\n");
/* Unregister the ntfs sysctls. */
ntfs_sysctl(0);
@@ -3184,8 +3181,7 @@
kmem_cache_destroy(ntfs_index_ctx_cache);
ictx_err_out:
if (!err) {
- printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver "
- "registration...\n");
+ pr_crit("Aborting NTFS filesystem driver registration...\n");
err = -ENOMEM;
}
return err;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 656e401..64db2bc 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -138,8 +138,8 @@
"D (disk sleep)", /* 2 */
"T (stopped)", /* 4 */
"t (tracing stop)", /* 8 */
- "Z (zombie)", /* 16 */
- "X (dead)", /* 32 */
+ "X (dead)", /* 16 */
+ "Z (zombie)", /* 32 */
};
static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b976062..6b7087e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1236,6 +1236,9 @@
make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
+ if (make_it_fail < 0 || make_it_fail > 1)
+ return -EINVAL;
+
task = get_proc_task(file_inode(file));
if (!task)
return -ESRCH;
@@ -2588,7 +2591,7 @@
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
- ONE("personality", S_IRUGO, proc_pid_personality),
+ ONE("personality", S_IRUSR, proc_pid_personality),
INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
@@ -2598,7 +2601,7 @@
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
- INF("syscall", S_IRUGO, proc_pid_syscall),
+ INF("syscall", S_IRUSR, proc_pid_syscall),
#endif
INF("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tgid_stat),
@@ -2617,7 +2620,7 @@
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
- REG("pagemap", S_IRUGO, proc_pagemap_operations),
+ REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2626,7 +2629,7 @@
INF("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
- ONE("stack", S_IRUGO, proc_pid_stack),
+ ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -2927,14 +2930,14 @@
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
- ONE("personality", S_IRUGO, proc_pid_personality),
+ ONE("personality", S_IRUSR, proc_pid_personality),
INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
- INF("syscall", S_IRUGO, proc_pid_syscall),
+ INF("syscall", S_IRUSR, proc_pid_syscall),
#endif
INF("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tid_stat),
@@ -2955,7 +2958,7 @@
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_tid_smaps_operations),
- REG("pagemap", S_IRUGO, proc_pagemap_operations),
+ REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2964,7 +2967,7 @@
INF("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
- ONE("stack", S_IRUGO, proc_pid_stack),
+ ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, proc_pid_schedstat),
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 985ea88..0788d09 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -11,6 +11,7 @@
#include <linux/proc_fs.h>
+#include "../mount.h"
#include "internal.h"
#include "fd.h"
@@ -48,8 +49,9 @@
}
if (!ret) {
- seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
- (long long)file->f_pos, f_flags);
+ seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
+ (long long)file->f_pos, f_flags,
+ real_mount(file->f_path.mnt)->mnt_id);
if (file->f_op->show_fdinfo)
ret = file->f_op->show_fdinfo(m, file);
fput(file);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 8f20e34..0adbc02 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -47,7 +47,7 @@
pde_put(de);
head = PROC_I(inode)->sysctl;
if (head) {
- rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
+ RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
sysctl_head_put(head);
}
/* Release any associated namespace */
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 136e548..7445af0 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -73,7 +73,7 @@
available += pagecache;
/*
- * Part of the reclaimable swap consists of items that are in use,
+ * Part of the reclaimable slab consists of items that are in use,
* and cannot be freed. Cap this estimate at the low watermark.
*/
available += global_page_state(NR_SLAB_RECLAIMABLE) -
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fb52b54..442177b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,4 +1,5 @@
#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
#include <linux/mount.h>
@@ -152,7 +153,7 @@
/*
* We remember last_addr rather than next_addr to hit with
- * mmap_cache most of the time. We have zero last_addr at
+ * vmacache most of the time. We have zero last_addr at
* the beginning and also after lseek. We will have -1 last_addr
* after the end of the vmas.
*/
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 88d4585..6a8e785 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -484,7 +484,6 @@
phdr_ptr->p_memsz = real_sz;
if (real_sz == 0) {
pr_warn("Warning: Zero PT_NOTE entries found\n");
- return -EINVAL;
}
}
@@ -671,7 +670,6 @@
phdr_ptr->p_memsz = real_sz;
if (real_sz == 0) {
pr_warn("Warning: Zero PT_NOTE entries found\n");
- return -EINVAL;
}
}
@@ -1118,4 +1116,3 @@
}
free_elfcorebuf();
}
-EXPORT_SYMBOL_GPL(vmcore_cleanup);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 123c79b..4f34dba 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1538,6 +1538,7 @@
static const struct vm_operations_struct ubifs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = ubifs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a7ea492..0ab1de4 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -38,7 +38,6 @@
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
- struct ufs_super_block_first * usb1;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned cgno, bit, end_bit, bbase, blkmap, i;
@@ -46,7 +45,6 @@
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
- usb1 = ubh_get_usb_first(uspi);
UFSD("ENTER, fragment %llu, count %u\n",
(unsigned long long)fragment, count);
@@ -135,7 +133,6 @@
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
- struct ufs_super_block_first * usb1;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned overflow, cgno, bit, end_bit, i;
@@ -143,7 +140,6 @@
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
- usb1 = ubh_get_usb_first(uspi);
UFSD("ENTER, fragment %llu, count %u\n",
(unsigned long long)fragment, count);
@@ -499,7 +495,6 @@
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
- struct ufs_super_block_first * usb1;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned cgno, fragno, fragoff, count, fragsize, i;
@@ -509,7 +504,6 @@
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
- usb1 = ubh_get_usb_first (uspi);
count = newcount - oldcount;
cgno = ufs_dtog(uspi, fragment);
@@ -577,7 +571,6 @@
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
- struct ufs_super_block_first * usb1;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned oldcg, i, j, k, allocsize;
@@ -588,7 +581,6 @@
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
- usb1 = ubh_get_usb_first(uspi);
oldcg = cgno;
/*
@@ -690,7 +682,6 @@
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
- struct ufs_super_block_first * usb1;
struct ufs_cylinder_group * ucg;
u64 result, blkno;
@@ -698,7 +689,6 @@
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
- usb1 = ubh_get_usb_first(uspi);
ucg = ubh_get_ucg(UCPI_UBH(ucpi));
if (goal == 0) {
@@ -794,7 +784,6 @@
0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe
};
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
- struct ufs_super_block_first *usb1;
struct ufs_cylinder_group *ucg;
unsigned start, length, loc;
unsigned pos, want, blockmap, mask, end;
@@ -803,7 +792,6 @@
UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx,
(unsigned long long)goal, count);
- usb1 = ubh_get_usb_first (uspi);
ucg = ubh_get_ucg(UCPI_UBH(ucpi));
if (goal)
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index d0426d7..98f7211 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -57,7 +57,6 @@
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
- struct ufs_super_block_first * usb1;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
int is_directory;
@@ -67,7 +66,6 @@
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
- usb1 = ubh_get_usb_first(uspi);
ino = inode->i_ino;
@@ -175,7 +173,6 @@
struct super_block * sb;
struct ufs_sb_info * sbi;
struct ufs_sb_private_info * uspi;
- struct ufs_super_block_first * usb1;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
struct inode * inode;
@@ -195,7 +192,6 @@
ufsi = UFS_I(inode);
sbi = UFS_SB(sb);
uspi = sbi->s_uspi;
- usb1 = ubh_get_usb_first(uspi);
mutex_lock(&sbi->s_lock);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b8c6791..c1183f9 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -524,11 +524,9 @@
struct ufs_buffer_head * ubh;
unsigned char * base, * space;
unsigned size, blks, i;
- struct ufs_super_block_third *usb3;
UFSD("ENTER\n");
- usb3 = ubh_get_usb_third(uspi);
/*
* Read cs structures from (usually) first data block
* on the device.
@@ -1390,15 +1388,11 @@
struct super_block *sb = dentry->d_sb;
struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi;
unsigned flags = UFS_SB(sb)->s_flags;
- struct ufs_super_block_first *usb1;
- struct ufs_super_block_second *usb2;
struct ufs_super_block_third *usb3;
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
lock_ufs(sb);
- usb1 = ubh_get_usb_first(uspi);
- usb2 = ubh_get_usb_second(uspi);
usb3 = ubh_get_usb_third(uspi);
if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
@@ -1454,7 +1448,7 @@
inode_init_once(&ei->vfs_inode);
}
-static int init_inodecache(void)
+static int __init init_inodecache(void)
{
ufs_inode_cachep = kmem_cache_create("ufs_inode_cache",
sizeof(struct ufs_inode_info),
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7abff8..003c005 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1483,6 +1483,7 @@
static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = xfs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 7d10f96..630dd23 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -52,7 +52,7 @@
#endif
#ifndef HAVE_ARCH_BUG_ON
-#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while(0)
+#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
#endif
/*
@@ -106,33 +106,6 @@
unlikely(__ret_warn_on); \
})
-#else /* !CONFIG_BUG */
-#ifndef HAVE_ARCH_BUG
-#define BUG() do {} while(0)
-#endif
-
-#ifndef HAVE_ARCH_BUG_ON
-#define BUG_ON(condition) do { if (condition) ; } while(0)
-#endif
-
-#ifndef HAVE_ARCH_WARN_ON
-#define WARN_ON(condition) ({ \
- int __ret_warn_on = !!(condition); \
- unlikely(__ret_warn_on); \
-})
-#endif
-
-#ifndef WARN
-#define WARN(condition, format...) ({ \
- int __ret_warn_on = !!(condition); \
- unlikely(__ret_warn_on); \
-})
-#endif
-
-#define WARN_TAINT(condition, taint, format...) WARN_ON(condition)
-
-#endif
-
#define WARN_ON_ONCE(condition) ({ \
static bool __section(.data.unlikely) __warned; \
int __ret_warn_once = !!(condition); \
@@ -163,6 +136,37 @@
unlikely(__ret_warn_once); \
})
+#else /* !CONFIG_BUG */
+#ifndef HAVE_ARCH_BUG
+#define BUG() do {} while (1)
+#endif
+
+#ifndef HAVE_ARCH_BUG_ON
+#define BUG_ON(condition) do { if (condition) ; } while (0)
+#endif
+
+#ifndef HAVE_ARCH_WARN_ON
+#define WARN_ON(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+
+#ifndef WARN
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ no_printk(format); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+
+#define WARN_ON_ONCE(condition) WARN_ON(condition)
+#define WARN_ONCE(condition, format...) WARN(condition, format)
+#define WARN_TAINT(condition, taint, format...) WARN(condition, format)
+#define WARN_TAINT_ONCE(condition, taint, format...) WARN(condition, format)
+
+#endif
+
/*
* WARN_ON_SMP() is for cases that the warning is either
* meaningless for !SMP or may even cause failures.
diff --git a/include/asm-generic/early_ioremap.h b/include/asm-generic/early_ioremap.h
new file mode 100644
index 0000000..a5de55c
--- /dev/null
+++ b/include/asm-generic/early_ioremap.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_EARLY_IOREMAP_H_
+#define _ASM_EARLY_IOREMAP_H_
+
+#include <linux/types.h>
+
+/*
+ * early_ioremap() and early_iounmap() are for temporary early boot-time
+ * mappings, before the real ioremap() is functional.
+ */
+extern void __iomem *early_ioremap(resource_size_t phys_addr,
+ unsigned long size);
+extern void *early_memremap(resource_size_t phys_addr,
+ unsigned long size);
+extern void early_iounmap(void __iomem *addr, unsigned long size);
+extern void early_memunmap(void *addr, unsigned long size);
+
+/*
+ * Weak function called by early_ioremap_reset(). It does nothing, but
+ * architectures may provide their own version to do any needed cleanups.
+ */
+extern void early_ioremap_shutdown(void);
+
+#if defined(CONFIG_GENERIC_EARLY_IOREMAP) && defined(CONFIG_MMU)
+/* Arch-specific initialization */
+extern void early_ioremap_init(void);
+
+/* Generic initialization called by architecture code */
+extern void early_ioremap_setup(void);
+
+/*
+ * Called as last step in paging_init() so library can act
+ * accordingly for subsequent map/unmap requests.
+ */
+extern void early_ioremap_reset(void);
+
+#else
+static inline void early_ioremap_init(void) { }
+static inline void early_ioremap_setup(void) { }
+static inline void early_ioremap_reset(void) { }
+#endif
+
+#endif /* _ASM_EARLY_IOREMAP_H_ */
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index d5afe96..975e1cc 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -327,7 +327,7 @@
}
#endif /* CONFIG_MMU */
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
#ifndef CONFIG_GENERIC_IOMAP
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
@@ -341,7 +341,7 @@
extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
extern void ioport_unmap(void __iomem *p);
#endif /* CONFIG_GENERIC_IOMAP */
-#endif /* CONFIG_HAS_IOPORT */
+#endif /* CONFIG_HAS_IOPORT_MAP */
#ifndef xlate_dev_kmem_ptr
#define xlate_dev_kmem_ptr(p) p
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 6afd7d6..1b41011 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -56,7 +56,7 @@
extern void iowrite16_rep(void __iomem *port, const void *buf, unsigned long count);
extern void iowrite32_rep(void __iomem *port, const void *buf, unsigned long count);
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
/* Create a virtual mapping cookie for an IO port range */
extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
extern void ioport_unmap(void __iomem *);
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index d17784e..0703aa7 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -56,17 +56,17 @@
#define per_cpu(var, cpu) \
(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
-#ifndef __this_cpu_ptr
-#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
+#ifndef raw_cpu_ptr
+#define raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
#endif
#ifdef CONFIG_DEBUG_PREEMPT
#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
#else
-#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
+#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
#endif
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
-#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
+#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var)))
#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
extern void setup_per_cpu_areas(void);
@@ -83,7 +83,7 @@
#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var)))
#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var)))
#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0)
-#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr)
+#define raw_cpu_ptr(ptr) this_cpu_ptr(ptr)
#endif /* SMP */
@@ -122,4 +122,7 @@
#define PER_CPU_DEF_ATTRIBUTES
#endif
+/* Keep until we have removed all uses of __this_cpu_ptr */
+#define __this_cpu_ptr raw_cpu_ptr
+
#endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index b4a745d..61f29e5 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -44,7 +44,6 @@
unsigned interp_flags;
unsigned interp_data;
unsigned long loader, exec;
- char tcomm[TASK_COMM_LEN];
};
#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 7032518..72ab536 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -25,6 +25,7 @@
extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
unsigned long, int);
+void vmcore_cleanup(void);
/* Architecture code defines this if there are other possible ELF
* machine types, e.g. on bi-arch capable hardware. */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index f669585..6af3400 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -133,69 +133,6 @@
for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
/*
- * Don't use the following functions. These exist only to suppress
- * deprecated warnings on EXPORT_SYMBOL()s.
- */
-int __idr_pre_get(struct idr *idp, gfp_t gfp_mask);
-int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
-void __idr_remove_all(struct idr *idp);
-
-/**
- * idr_pre_get - reserve resources for idr allocation
- * @idp: idr handle
- * @gfp_mask: memory allocation flags
- *
- * Part of old alloc interface. This is going away. Use
- * idr_preload[_end]() and idr_alloc() instead.
- */
-static inline int __deprecated idr_pre_get(struct idr *idp, gfp_t gfp_mask)
-{
- return __idr_pre_get(idp, gfp_mask);
-}
-
-/**
- * idr_get_new_above - allocate new idr entry above or equal to a start id
- * @idp: idr handle
- * @ptr: pointer you want associated with the id
- * @starting_id: id to start search at
- * @id: pointer to the allocated handle
- *
- * Part of old alloc interface. This is going away. Use
- * idr_preload[_end]() and idr_alloc() instead.
- */
-static inline int __deprecated idr_get_new_above(struct idr *idp, void *ptr,
- int starting_id, int *id)
-{
- return __idr_get_new_above(idp, ptr, starting_id, id);
-}
-
-/**
- * idr_get_new - allocate new idr entry
- * @idp: idr handle
- * @ptr: pointer you want associated with the id
- * @id: pointer to the allocated handle
- *
- * Part of old alloc interface. This is going away. Use
- * idr_preload[_end]() and idr_alloc() instead.
- */
-static inline int __deprecated idr_get_new(struct idr *idp, void *ptr, int *id)
-{
- return __idr_get_new_above(idp, ptr, 0, id);
-}
-
-/**
- * idr_remove_all - remove all ids from the given idr tree
- * @idp: idr handle
- *
- * If you're trying to destroy @idp, calling idr_destroy() is enough.
- * This is going away. Don't use.
- */
-static inline void __deprecated idr_remove_all(struct idr *idp)
-{
- __idr_remove_all(idp);
-}
-
-/*
* IDA - IDR based id allocator, use when translation from id to
* pointer isn't necessary.
*
diff --git a/include/linux/io.h b/include/linux/io.h
index 8a18e75..b76e6e5 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -41,7 +41,7 @@
/*
* Managed iomap interface
*/
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
void __iomem * devm_ioport_map(struct device *dev, unsigned long port,
unsigned int nr);
void devm_ioport_unmap(struct device *dev, void __iomem *addr);
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 96549ab..0081f00 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -25,6 +25,8 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
+#ifdef CONFIG_SMP
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define LOCKDEP_INIT_MAP lockdep_init_map
#else
@@ -57,4 +59,18 @@
void lg_global_lock(struct lglock *lg);
void lg_global_unlock(struct lglock *lg);
+#else
+/* When !CONFIG_SMP, map lglock to spinlock */
+#define lglock spinlock
+#define DEFINE_LGLOCK(name) DEFINE_SPINLOCK(name)
+#define DEFINE_STATIC_LGLOCK(name) static DEFINE_SPINLOCK(name)
+#define lg_lock_init(lg, name) spin_lock_init(lg)
+#define lg_local_lock spin_lock
+#define lg_local_unlock spin_unlock
+#define lg_local_lock_cpu(lg, cpu) spin_lock(lg)
+#define lg_local_unlock_cpu(lg, cpu) spin_unlock(lg)
+#define lg_global_lock spin_lock
+#define lg_global_unlock spin_unlock
+#endif
+
#endif
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index eccfb4a..b569b8b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -65,7 +65,7 @@
* (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
*/
-extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
+extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
/* for swap handling */
extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
@@ -74,7 +74,7 @@
struct mem_cgroup *memcg);
extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
-extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
+extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
@@ -94,7 +94,6 @@
extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
-extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
@@ -234,13 +233,13 @@
#else /* CONFIG_MEMCG */
struct mem_cgroup;
-static inline int mem_cgroup_newpage_charge(struct page *page,
+static inline int mem_cgroup_charge_anon(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
return 0;
}
-static inline int mem_cgroup_cache_charge(struct page *page,
+static inline int mem_cgroup_charge_file(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
return 0;
@@ -294,11 +293,6 @@
return NULL;
}
-static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
-{
- return NULL;
-}
-
static inline bool mm_match_cgroup(struct mm_struct *mm,
struct mem_cgroup *memcg)
{
@@ -497,6 +491,9 @@
void __memcg_kmem_uncharge_pages(struct page *page, int order);
int memcg_cache_id(struct mem_cgroup *memcg);
+
+char *memcg_create_cache_name(struct mem_cgroup *memcg,
+ struct kmem_cache *root_cache);
int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
struct kmem_cache *root_cache);
void memcg_free_cache_params(struct kmem_cache *s);
@@ -510,7 +507,7 @@
__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
-void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
+int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
/**
* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
@@ -664,10 +661,6 @@
{
return cachep;
}
-
-static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
-{
-}
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5f1ea75..3c1b968 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -143,7 +143,6 @@
extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
enum mpol_rebind_step step);
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
-extern void mpol_fix_fork_child_flag(struct task_struct *p);
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
@@ -151,7 +150,7 @@
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
extern bool mempolicy_nodemask_intersects(struct task_struct *tsk,
const nodemask_t *mask);
-extern unsigned slab_node(void);
+extern unsigned int mempolicy_slab_node(void);
extern enum zone_type policy_zone;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 35300f3..abc8484 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -177,6 +177,9 @@
*/
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+/* This mask defines which mm->def_flags a process can inherit its parent */
+#define VM_INIT_DEF_MASK VM_NOHUGEPAGE
+
/*
* mapping from the currently active vm_flags protection bits (the
* low four bits) to a page protection mask..
@@ -210,6 +213,10 @@
* is set (which is also implied by
* VM_FAULT_ERROR).
*/
+ /* for ->map_pages() only */
+ pgoff_t max_pgoff; /* map pages for offset from pgoff till
+ * max_pgoff inclusive */
+ pte_t *pte; /* pte entry associated with ->pgoff */
};
/*
@@ -221,6 +228,7 @@
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
@@ -581,6 +589,9 @@
pte = pte_mkwrite(pte);
return pte;
}
+
+void do_set_pte(struct vm_area_struct *vma, unsigned long address,
+ struct page *page, pte_t *pte, bool write, bool anon);
#endif
/*
@@ -684,7 +695,7 @@
#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)
#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
-#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1)
+#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1)
#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)
static inline enum zone_type page_zonenum(const struct page *page)
@@ -1836,6 +1847,7 @@
/* generic vm_area_ops exported for stackable file systems */
extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
+extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf);
extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
/* mm/page-writeback.c */
@@ -1863,9 +1875,6 @@
unsigned long size);
unsigned long max_sane_readahead(unsigned long nr);
-unsigned long ra_submit(struct file_ra_state *ra,
- struct address_space *mapping,
- struct file *filp);
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 290901a..2b58d19 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -342,9 +342,9 @@
struct kioctx_table;
struct mm_struct {
- struct vm_area_struct * mmap; /* list of VMAs */
+ struct vm_area_struct *mmap; /* list of VMAs */
struct rb_root mm_rb;
- struct vm_area_struct * mmap_cache; /* last find_vma result */
+ u32 vmacache_seqnum; /* per-thread vmacache */
#ifdef CONFIG_MMU
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 5042c03..2d57efa 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -3,8 +3,8 @@
struct page;
-extern void dump_page(struct page *page, char *reason);
-extern void dump_page_badflags(struct page *page, char *reason,
+extern void dump_page(struct page *page, const char *reason);
+extern void dump_page_badflags(struct page *page, const char *reason,
unsigned long badflags);
#ifdef CONFIG_DEBUG_VM
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e3817d2..e7a0b95 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -173,6 +173,12 @@
extern void __bad_size_call_parameter(void);
+#ifdef CONFIG_DEBUG_PREEMPT
+extern void __this_cpu_preempt_check(const char *op);
+#else
+static inline void __this_cpu_preempt_check(const char *op) { }
+#endif
+
#define __pcpu_size_call_return(stem, variable) \
({ typeof(variable) pscr_ret__; \
__verify_pcpu_ptr(&(variable)); \
@@ -243,6 +249,8 @@
} while (0)
/*
+ * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@linux.com>
+ *
* Optimized manipulation for memory allocated through the per cpu
* allocator or for addresses of per cpu variables.
*
@@ -296,7 +304,7 @@
do { \
unsigned long flags; \
raw_local_irq_save(flags); \
- *__this_cpu_ptr(&(pcp)) op val; \
+ *raw_cpu_ptr(&(pcp)) op val; \
raw_local_irq_restore(flags); \
} while (0)
@@ -381,8 +389,8 @@
typeof(pcp) ret__; \
unsigned long flags; \
raw_local_irq_save(flags); \
- __this_cpu_add(pcp, val); \
- ret__ = __this_cpu_read(pcp); \
+ raw_cpu_add(pcp, val); \
+ ret__ = raw_cpu_read(pcp); \
raw_local_irq_restore(flags); \
ret__; \
})
@@ -411,8 +419,8 @@
({ typeof(pcp) ret__; \
unsigned long flags; \
raw_local_irq_save(flags); \
- ret__ = __this_cpu_read(pcp); \
- __this_cpu_write(pcp, nval); \
+ ret__ = raw_cpu_read(pcp); \
+ raw_cpu_write(pcp, nval); \
raw_local_irq_restore(flags); \
ret__; \
})
@@ -439,9 +447,9 @@
typeof(pcp) ret__; \
unsigned long flags; \
raw_local_irq_save(flags); \
- ret__ = __this_cpu_read(pcp); \
+ ret__ = raw_cpu_read(pcp); \
if (ret__ == (oval)) \
- __this_cpu_write(pcp, nval); \
+ raw_cpu_write(pcp, nval); \
raw_local_irq_restore(flags); \
ret__; \
})
@@ -476,7 +484,7 @@
int ret__; \
unsigned long flags; \
raw_local_irq_save(flags); \
- ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \
+ ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \
oval1, oval2, nval1, nval2); \
raw_local_irq_restore(flags); \
ret__; \
@@ -504,12 +512,8 @@
#endif
/*
- * Generic percpu operations for context that are safe from preemption/interrupts.
- * Either we do not care about races or the caller has the
- * responsibility of handling preemption/interrupt issues. Arch code can still
- * override these instructions since the arch per cpu code may be more
- * efficient and may actually get race freeness for free (that is the
- * case for x86 for example).
+ * Generic percpu operations for contexts where we do not want to do
+ * any checks for preemptiosn.
*
* If there is no other protection through preempt disable and/or
* disabling interupts then one of these RMW operations can show unexpected
@@ -517,57 +521,234 @@
* or an interrupt occurred and the same percpu variable was modified from
* the interrupt context.
*/
-#ifndef __this_cpu_read
-# ifndef __this_cpu_read_1
-# define __this_cpu_read_1(pcp) (*__this_cpu_ptr(&(pcp)))
+#ifndef raw_cpu_read
+# ifndef raw_cpu_read_1
+# define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
# endif
-# ifndef __this_cpu_read_2
-# define __this_cpu_read_2(pcp) (*__this_cpu_ptr(&(pcp)))
+# ifndef raw_cpu_read_2
+# define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
# endif
-# ifndef __this_cpu_read_4
-# define __this_cpu_read_4(pcp) (*__this_cpu_ptr(&(pcp)))
+# ifndef raw_cpu_read_4
+# define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
# endif
-# ifndef __this_cpu_read_8
-# define __this_cpu_read_8(pcp) (*__this_cpu_ptr(&(pcp)))
+# ifndef raw_cpu_read_8
+# define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
# endif
-# define __this_cpu_read(pcp) __pcpu_size_call_return(__this_cpu_read_, (pcp))
+# define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp))
#endif
-#define __this_cpu_generic_to_op(pcp, val, op) \
+#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
- *__this_cpu_ptr(&(pcp)) op val; \
+ *raw_cpu_ptr(&(pcp)) op val; \
} while (0)
+
+#ifndef raw_cpu_write
+# ifndef raw_cpu_write_1
+# define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef raw_cpu_write_2
+# define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef raw_cpu_write_4
+# define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), =)
+# endif
+# ifndef raw_cpu_write_8
+# define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), =)
+# endif
+# define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val))
+#endif
+
+#ifndef raw_cpu_add
+# ifndef raw_cpu_add_1
+# define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef raw_cpu_add_2
+# define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef raw_cpu_add_4
+# define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# ifndef raw_cpu_add_8
+# define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=)
+# endif
+# define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val))
+#endif
+
+#ifndef raw_cpu_sub
+# define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val))
+#endif
+
+#ifndef raw_cpu_inc
+# define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1)
+#endif
+
+#ifndef raw_cpu_dec
+# define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1)
+#endif
+
+#ifndef raw_cpu_and
+# ifndef raw_cpu_and_1
+# define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef raw_cpu_and_2
+# define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef raw_cpu_and_4
+# define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# ifndef raw_cpu_and_8
+# define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=)
+# endif
+# define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val))
+#endif
+
+#ifndef raw_cpu_or
+# ifndef raw_cpu_or_1
+# define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef raw_cpu_or_2
+# define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef raw_cpu_or_4
+# define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# ifndef raw_cpu_or_8
+# define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=)
+# endif
+# define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val))
+#endif
+
+#define raw_cpu_generic_add_return(pcp, val) \
+({ \
+ raw_cpu_add(pcp, val); \
+ raw_cpu_read(pcp); \
+})
+
+#ifndef raw_cpu_add_return
+# ifndef raw_cpu_add_return_1
+# define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef raw_cpu_add_return_2
+# define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef raw_cpu_add_return_4
+# define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val)
+# endif
+# ifndef raw_cpu_add_return_8
+# define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val)
+# endif
+# define raw_cpu_add_return(pcp, val) \
+ __pcpu_size_call_return2(raw_add_return_, pcp, val)
+#endif
+
+#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val))
+#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1)
+#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1)
+
+#define raw_cpu_generic_xchg(pcp, nval) \
+({ typeof(pcp) ret__; \
+ ret__ = raw_cpu_read(pcp); \
+ raw_cpu_write(pcp, nval); \
+ ret__; \
+})
+
+#ifndef raw_cpu_xchg
+# ifndef raw_cpu_xchg_1
+# define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef raw_cpu_xchg_2
+# define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef raw_cpu_xchg_4
+# define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
+# endif
+# ifndef raw_cpu_xchg_8
+# define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval)
+# endif
+# define raw_cpu_xchg(pcp, nval) \
+ __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)
+#endif
+
+#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
+({ \
+ typeof(pcp) ret__; \
+ ret__ = raw_cpu_read(pcp); \
+ if (ret__ == (oval)) \
+ raw_cpu_write(pcp, nval); \
+ ret__; \
+})
+
+#ifndef raw_cpu_cmpxchg
+# ifndef raw_cpu_cmpxchg_1
+# define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef raw_cpu_cmpxchg_2
+# define raw_cpu_cmpxchg_2(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef raw_cpu_cmpxchg_4
+# define raw_cpu_cmpxchg_4(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# ifndef raw_cpu_cmpxchg_8
+# define raw_cpu_cmpxchg_8(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval)
+# endif
+# define raw_cpu_cmpxchg(pcp, oval, nval) \
+ __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
+#endif
+
+#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+({ \
+ int __ret = 0; \
+ if (raw_cpu_read(pcp1) == (oval1) && \
+ raw_cpu_read(pcp2) == (oval2)) { \
+ raw_cpu_write(pcp1, (nval1)); \
+ raw_cpu_write(pcp2, (nval2)); \
+ __ret = 1; \
+ } \
+ (__ret); \
+})
+
+#ifndef raw_cpu_cmpxchg_double
+# ifndef raw_cpu_cmpxchg_double_1
+# define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef raw_cpu_cmpxchg_double_2
+# define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef raw_cpu_cmpxchg_double_4
+# define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef raw_cpu_cmpxchg_double_8
+# define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+#endif
+
+/*
+ * Generic percpu operations for context that are safe from preemption/interrupts.
+ */
+#ifndef __this_cpu_read
+# define __this_cpu_read(pcp) \
+ (__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp)))
+#endif
+
#ifndef __this_cpu_write
-# ifndef __this_cpu_write_1
-# define __this_cpu_write_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef __this_cpu_write_2
-# define __this_cpu_write_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef __this_cpu_write_4
-# define __this_cpu_write_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# ifndef __this_cpu_write_8
-# define __this_cpu_write_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), =)
-# endif
-# define __this_cpu_write(pcp, val) __pcpu_size_call(__this_cpu_write_, (pcp), (val))
+# define __this_cpu_write(pcp, val) \
+do { __this_cpu_preempt_check("write"); \
+ __pcpu_size_call(raw_cpu_write_, (pcp), (val)); \
+} while (0)
#endif
#ifndef __this_cpu_add
-# ifndef __this_cpu_add_1
-# define __this_cpu_add_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef __this_cpu_add_2
-# define __this_cpu_add_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef __this_cpu_add_4
-# define __this_cpu_add_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# ifndef __this_cpu_add_8
-# define __this_cpu_add_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=)
-# endif
-# define __this_cpu_add(pcp, val) __pcpu_size_call(__this_cpu_add_, (pcp), (val))
+# define __this_cpu_add(pcp, val) \
+do { __this_cpu_preempt_check("add"); \
+ __pcpu_size_call(raw_cpu_add_, (pcp), (val)); \
+} while (0)
#endif
#ifndef __this_cpu_sub
@@ -583,145 +764,42 @@
#endif
#ifndef __this_cpu_and
-# ifndef __this_cpu_and_1
-# define __this_cpu_and_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef __this_cpu_and_2
-# define __this_cpu_and_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef __this_cpu_and_4
-# define __this_cpu_and_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# ifndef __this_cpu_and_8
-# define __this_cpu_and_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=)
-# endif
-# define __this_cpu_and(pcp, val) __pcpu_size_call(__this_cpu_and_, (pcp), (val))
+# define __this_cpu_and(pcp, val) \
+do { __this_cpu_preempt_check("and"); \
+ __pcpu_size_call(raw_cpu_and_, (pcp), (val)); \
+} while (0)
+
#endif
#ifndef __this_cpu_or
-# ifndef __this_cpu_or_1
-# define __this_cpu_or_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef __this_cpu_or_2
-# define __this_cpu_or_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef __this_cpu_or_4
-# define __this_cpu_or_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# ifndef __this_cpu_or_8
-# define __this_cpu_or_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=)
-# endif
-# define __this_cpu_or(pcp, val) __pcpu_size_call(__this_cpu_or_, (pcp), (val))
+# define __this_cpu_or(pcp, val) \
+do { __this_cpu_preempt_check("or"); \
+ __pcpu_size_call(raw_cpu_or_, (pcp), (val)); \
+} while (0)
#endif
-#define __this_cpu_generic_add_return(pcp, val) \
-({ \
- __this_cpu_add(pcp, val); \
- __this_cpu_read(pcp); \
-})
-
#ifndef __this_cpu_add_return
-# ifndef __this_cpu_add_return_1
-# define __this_cpu_add_return_1(pcp, val) __this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef __this_cpu_add_return_2
-# define __this_cpu_add_return_2(pcp, val) __this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef __this_cpu_add_return_4
-# define __this_cpu_add_return_4(pcp, val) __this_cpu_generic_add_return(pcp, val)
-# endif
-# ifndef __this_cpu_add_return_8
-# define __this_cpu_add_return_8(pcp, val) __this_cpu_generic_add_return(pcp, val)
-# endif
# define __this_cpu_add_return(pcp, val) \
- __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val)
+ (__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val))
#endif
#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val))
#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1)
#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1)
-#define __this_cpu_generic_xchg(pcp, nval) \
-({ typeof(pcp) ret__; \
- ret__ = __this_cpu_read(pcp); \
- __this_cpu_write(pcp, nval); \
- ret__; \
-})
-
#ifndef __this_cpu_xchg
-# ifndef __this_cpu_xchg_1
-# define __this_cpu_xchg_1(pcp, nval) __this_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef __this_cpu_xchg_2
-# define __this_cpu_xchg_2(pcp, nval) __this_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef __this_cpu_xchg_4
-# define __this_cpu_xchg_4(pcp, nval) __this_cpu_generic_xchg(pcp, nval)
-# endif
-# ifndef __this_cpu_xchg_8
-# define __this_cpu_xchg_8(pcp, nval) __this_cpu_generic_xchg(pcp, nval)
-# endif
# define __this_cpu_xchg(pcp, nval) \
- __pcpu_size_call_return2(__this_cpu_xchg_, (pcp), nval)
+ (__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval))
#endif
-#define __this_cpu_generic_cmpxchg(pcp, oval, nval) \
-({ \
- typeof(pcp) ret__; \
- ret__ = __this_cpu_read(pcp); \
- if (ret__ == (oval)) \
- __this_cpu_write(pcp, nval); \
- ret__; \
-})
-
#ifndef __this_cpu_cmpxchg
-# ifndef __this_cpu_cmpxchg_1
-# define __this_cpu_cmpxchg_1(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef __this_cpu_cmpxchg_2
-# define __this_cpu_cmpxchg_2(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef __this_cpu_cmpxchg_4
-# define __this_cpu_cmpxchg_4(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
-# ifndef __this_cpu_cmpxchg_8
-# define __this_cpu_cmpxchg_8(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval)
-# endif
# define __this_cpu_cmpxchg(pcp, oval, nval) \
- __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval)
+ (__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval))
#endif
-#define __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-({ \
- int __ret = 0; \
- if (__this_cpu_read(pcp1) == (oval1) && \
- __this_cpu_read(pcp2) == (oval2)) { \
- __this_cpu_write(pcp1, (nval1)); \
- __this_cpu_write(pcp2, (nval2)); \
- __ret = 1; \
- } \
- (__ret); \
-})
-
#ifndef __this_cpu_cmpxchg_double
-# ifndef __this_cpu_cmpxchg_double_1
-# define __this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
- __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef __this_cpu_cmpxchg_double_2
-# define __this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
- __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef __this_cpu_cmpxchg_double_4
-# define __this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
- __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
-# ifndef __this_cpu_cmpxchg_double_8
-# define __this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
- __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-# endif
# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
- __pcpu_double_call_return_bool(__this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+ (__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)))
#endif
#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 201a697..56b7bc3 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -104,15 +104,13 @@
* units, e.g. numbers, bytes, Kbytes, etc
*
* returns 0 on success and <0 if the counter->usage will exceed the
- * counter->limit _locked call expects the counter->lock to be taken
+ * counter->limit
*
* charge_nofail works the same, except that it charges the resource
* counter unconditionally, and returns < 0 if the after the current
* charge we are over limit.
*/
-int __must_check res_counter_charge_locked(struct res_counter *counter,
- unsigned long val, bool force);
int __must_check res_counter_charge(struct res_counter *counter,
unsigned long val, struct res_counter **limit_fail_at);
int res_counter_charge_nofail(struct res_counter *counter,
@@ -125,12 +123,10 @@
* @val: the amount of the resource
*
* these calls check for usage underflow and show a warning on the console
- * _locked call expects the counter->lock to be taken
*
* returns the total charges still present in @counter.
*/
-u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
u64 res_counter_uncharge_until(struct res_counter *counter,
diff --git a/include/linux/rio.h b/include/linux/rio.h
index b71d573..6bda06f 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -83,7 +83,7 @@
#define RIO_CTAG_UDEVID 0x0001ffff /* Unique device identifier */
extern struct bus_type rio_bus_type;
-extern struct device rio_bus;
+extern struct class rio_mport_class;
struct rio_mport;
struct rio_dev;
@@ -201,6 +201,7 @@
#define rio_dev_f(n) list_entry(n, struct rio_dev, net_list)
#define to_rio_dev(n) container_of(n, struct rio_dev, dev)
#define sw_to_rio_dev(n) container_of(n, struct rio_dev, rswitch[0])
+#define to_rio_mport(n) container_of(n, struct rio_mport, dev)
/**
* struct rio_msg - RIO message event
@@ -248,6 +249,7 @@
* @phy_type: RapidIO phy type
* @phys_efptr: RIO port extended features pointer
* @name: Port name string
+ * @dev: device structure associated with an mport
* @priv: Master port private data
* @dma: DMA device associated with mport
* @nscan: RapidIO network enumeration/discovery operations
@@ -272,6 +274,7 @@
enum rio_phy_type phy_type; /* RapidIO phy type */
u32 phys_efptr;
unsigned char name[RIO_MAX_MPORT_NAME];
+ struct device dev;
void *priv; /* Master port private data */
#ifdef CONFIG_RAPIDIO_DMA_ENGINE
struct dma_device dma;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7cb07fd..075b305 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -132,6 +132,10 @@
struct blk_plug;
struct filename;
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
/*
* List of flags we want to share for kernel threads,
* if only because they are not used by them anyway.
@@ -206,8 +210,9 @@
#define __TASK_STOPPED 4
#define __TASK_TRACED 8
/* in tsk->exit_state */
-#define EXIT_ZOMBIE 16
-#define EXIT_DEAD 32
+#define EXIT_DEAD 16
+#define EXIT_ZOMBIE 32
+#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
/* in tsk->state again */
#define TASK_DEAD 64
#define TASK_WAKEKILL 128
@@ -1235,6 +1240,9 @@
#ifdef CONFIG_COMPAT_BRK
unsigned brk_randomized:1;
#endif
+ /* per-thread vma caching */
+ u32 vmacache_seqnum;
+ struct vm_area_struct *vmacache[VMACACHE_SIZE];
#if defined(SPLIT_RSS_COUNTING)
struct task_rss_stat rss_stat;
#endif
@@ -1844,7 +1852,6 @@
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
-#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */
@@ -2351,7 +2358,7 @@
struct task_struct *fork_idle(int);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-extern void set_task_comm(struct task_struct *tsk, char *from);
+extern void set_task_comm(struct task_struct *tsk, const char *from);
extern char *get_task_comm(char *to, struct task_struct *tsk);
#ifdef CONFIG_SMP
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b5b2df6..3dd389a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -115,9 +115,9 @@
struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
unsigned long,
void (*)(void *));
-struct kmem_cache *
-kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
- unsigned long, void (*)(void *), struct kmem_cache *);
+#ifdef CONFIG_MEMCG_KMEM
+void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *);
+#endif
void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *);
void kmem_cache_free(struct kmem_cache *, void *);
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f56bfa9..f2f7398 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -87,6 +87,9 @@
#ifdef CONFIG_MEMCG_KMEM
struct memcg_cache_params *memcg_params;
int max_attr_size; /* for propagation, maximum size of a stored attr */
+#ifdef CONFIG_SYSFS
+ struct kset *memcg_kset;
+#endif
#endif
#ifdef CONFIG_NUMA
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 12ae6ce..7062330 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -188,7 +188,7 @@
/* Returns the number of the current Node. */
static inline int numa_node_id(void)
{
- return __this_cpu_read(numa_node);
+ return raw_cpu_read(numa_node);
}
#endif
@@ -245,7 +245,7 @@
/* Returns the number of the nearest Node with memory */
static inline int numa_mem_id(void)
{
- return __this_cpu_read(_numa_mem_);
+ return raw_cpu_read(_numa_mem_);
}
#endif
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
new file mode 100644
index 0000000..c3fa0fd4
--- /dev/null
+++ b/include/linux/vmacache.h
@@ -0,0 +1,38 @@
+#ifndef __LINUX_VMACACHE_H
+#define __LINUX_VMACACHE_H
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+/*
+ * Hash based on the page number. Provides a good hit rate for
+ * workloads with good locality and those with random accesses as well.
+ */
+#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK)
+
+static inline void vmacache_flush(struct task_struct *tsk)
+{
+ memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
+}
+
+extern void vmacache_flush_all(struct mm_struct *mm);
+extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
+extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
+ unsigned long addr);
+
+#ifndef CONFIG_MMU
+extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end);
+#endif
+
+static inline void vmacache_invalidate(struct mm_struct *mm)
+{
+ mm->vmacache_seqnum++;
+
+ /* deal with overflows */
+ if (unlikely(mm->vmacache_seqnum == 0))
+ vmacache_flush_all(mm);
+}
+
+#endif /* __LINUX_VMACACHE_H */
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index ea44761..45c9cd1 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -27,9 +27,13 @@
DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
+/*
+ * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the
+ * local_irq_disable overhead.
+ */
static inline void __count_vm_event(enum vm_event_item item)
{
- __this_cpu_inc(vm_event_states.event[item]);
+ raw_cpu_inc(vm_event_states.event[item]);
}
static inline void count_vm_event(enum vm_event_item item)
@@ -39,7 +43,7 @@
static inline void __count_vm_events(enum vm_event_item item, long delta)
{
- __this_cpu_add(vm_event_states.event[item], delta);
+ raw_cpu_add(vm_event_states.event[item], delta);
}
static inline void count_vm_events(enum vm_event_item item, long delta)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 021b8a3..5777c13 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -178,7 +178,7 @@
struct writeback_control *wbc, writepage_t writepage,
void *data);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
-void set_page_dirty_balance(struct page *page, int page_mkwrite);
+void set_page_dirty_balance(struct page *page);
void writeback_set_ratelimit(void);
void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
diff --git a/include/trace/events/task.h b/include/trace/events/task.h
index 102a646..dee3bb1 100644
--- a/include/trace/events/task.h
+++ b/include/trace/events/task.h
@@ -32,7 +32,7 @@
TRACE_EVENT(task_rename,
- TP_PROTO(struct task_struct *task, char *comm),
+ TP_PROTO(struct task_struct *task, const char *comm),
TP_ARGS(task, comm),
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 4164529..ddc3b36 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -50,7 +50,7 @@
#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
overrides the coredump filter bits */
-#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */
+#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */
/* compatibility flags */
#define MAP_FILE 0
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 289760f..58afc04 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -149,4 +149,7 @@
#define PR_GET_TID_ADDRESS 40
+#define PR_SET_THP_DISABLE 41
+#define PR_GET_THP_DISABLE 42
+
#endif /* _LINUX_PRCTL_H */
diff --git a/init/Kconfig b/init/Kconfig
index 8851c64..427ba60 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1483,6 +1483,7 @@
config EMBEDDED
bool "Embedded system"
+ option allnoconfig_y
select EXPERT
help
This option should be enabled if compiling the kernel for
diff --git a/init/initramfs.c b/init/initramfs.c
index 93b6139..a8497fa 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -455,6 +455,7 @@
}
this_header = 0;
decompress = decompress_method(buf, len, &compress_name);
+ pr_debug("Detected %s compressed data\n", compress_name);
if (decompress) {
res = decompress(buf, len, NULL, flush_buffer, NULL,
&my_inptr, error);
diff --git a/ipc/compat.c b/ipc/compat.c
index a4695ad..45d035d 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -113,9 +113,6 @@
compat_ulong_t swap_attempts, swap_successes;
};
-extern int sem_ctls[];
-#define sc_semopm (sem_ctls[2])
-
static inline int compat_ipc_parse_version(int *cmd)
{
#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 1702864..998d31b 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -281,4 +281,4 @@
return 0;
}
-__initcall(ipc_sysctl_init);
+device_initcall(ipc_sysctl_init);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c3b3117..4fcf39a 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1459,4 +1459,4 @@
return error;
}
-__initcall(init_mqueue_fs);
+device_initcall(init_mqueue_fs);
diff --git a/ipc/util.c b/ipc/util.c
index e1b4c6d..2eb0d1e 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -128,7 +128,7 @@
register_ipcns_notifier(&init_ipc_ns);
return 0;
}
-__initcall(ipc_init);
+device_initcall(ipc_init);
/**
* ipc_init_ids - initialise ipc identifiers
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 99982a7..2956c8d 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -49,6 +49,7 @@
#include <linux/pid.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/rcupdate.h>
#include <asm/cacheflush.h>
@@ -224,10 +225,17 @@
if (!CACHE_FLUSH_IS_SAFE)
return;
- if (current->mm && current->mm->mmap_cache) {
- flush_cache_range(current->mm->mmap_cache,
- addr, addr + BREAK_INSTR_SIZE);
+ if (current->mm) {
+ int i;
+
+ for (i = 0; i < VMACACHE_SIZE; i++) {
+ if (!current->vmacache[i])
+ continue;
+ flush_cache_range(current->vmacache[i],
+ addr, addr + BREAK_INSTR_SIZE);
+ }
}
+
/* Force flush instruction cache if it was outside the mm */
flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 6480d1c..6ed6a1d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -570,7 +570,7 @@
if (same_thread_group(p->real_parent, father))
return;
- /* We don't want people slaying init. */
+ /* We don't want people slaying init. */
p->exit_signal = SIGCHLD;
/* If it has exited notify the new parent about this child's death. */
@@ -784,9 +784,10 @@
exit_shm(tsk);
exit_files(tsk);
exit_fs(tsk);
+ if (group_dead)
+ disassociate_ctty(1);
exit_task_namespaces(tsk);
exit_task_work(tsk);
- check_stack_usage();
exit_thread();
/*
@@ -799,19 +800,15 @@
cgroup_exit(tsk);
- if (group_dead)
- disassociate_ctty(1);
-
module_put(task_thread_info(tsk)->exec_domain->module);
- proc_exit_connector(tsk);
-
/*
* FIXME: do that only when needed, using sched_exit tracepoint
*/
flush_ptrace_hw_breakpoint(tsk);
exit_notify(tsk, group_dead);
+ proc_exit_connector(tsk);
#ifdef CONFIG_NUMA
task_lock(tsk);
mpol_put(tsk->mempolicy);
@@ -844,6 +841,7 @@
validate_creds_for_do_exit(tsk);
+ check_stack_usage();
preempt_disable();
if (tsk->nr_dirtied)
__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
@@ -1038,18 +1036,14 @@
return wait_noreap_copyout(wo, p, pid, uid, why, status);
}
- /*
- * Try to move the task's state to DEAD
- * only one thread is allowed to do this:
- */
- state = xchg(&p->exit_state, EXIT_DEAD);
- if (state != EXIT_ZOMBIE) {
- BUG_ON(state != EXIT_DEAD);
- return 0;
- }
-
traced = ptrace_reparented(p);
/*
+ * Move the task's state to DEAD/TRACE, only one thread can do this.
+ */
+ state = traced && thread_group_leader(p) ? EXIT_TRACE : EXIT_DEAD;
+ if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
+ return 0;
+ /*
* It can be ptraced but not reparented, check
* thread_group_leader() to filter out sub-threads.
*/
@@ -1109,7 +1103,7 @@
/*
* Now we are sure this task is interesting, and no other
- * thread can reap it because we set its state to EXIT_DEAD.
+ * thread can reap it because we its state == DEAD/TRACE.
*/
read_unlock(&tasklist_lock);
@@ -1146,22 +1140,19 @@
if (!retval)
retval = pid;
- if (traced) {
+ if (state == EXIT_TRACE) {
write_lock_irq(&tasklist_lock);
/* We dropped tasklist, ptracer could die and untrace */
ptrace_unlink(p);
- /*
- * If this is not a sub-thread, notify the parent.
- * If parent wants a zombie, don't release it now.
- */
- if (thread_group_leader(p) &&
- !do_notify_parent(p, p->exit_signal)) {
- p->exit_state = EXIT_ZOMBIE;
- p = NULL;
- }
+
+ /* If parent wants a zombie, don't release it now */
+ state = EXIT_ZOMBIE;
+ if (do_notify_parent(p, p->exit_signal))
+ state = EXIT_DEAD;
+ p->exit_state = state;
write_unlock_irq(&tasklist_lock);
}
- if (p != NULL)
+ if (state == EXIT_DEAD)
release_task(p);
return retval;
@@ -1338,7 +1329,12 @@
static int wait_consider_task(struct wait_opts *wo, int ptrace,
struct task_struct *p)
{
- int ret = eligible_child(wo, p);
+ int ret;
+
+ if (unlikely(p->exit_state == EXIT_DEAD))
+ return 0;
+
+ ret = eligible_child(wo, p);
if (!ret)
return ret;
@@ -1356,33 +1352,44 @@
return 0;
}
- /* dead body doesn't have much to contribute */
- if (unlikely(p->exit_state == EXIT_DEAD)) {
+ if (unlikely(p->exit_state == EXIT_TRACE)) {
/*
- * But do not ignore this task until the tracer does
- * wait_task_zombie()->do_notify_parent().
+ * ptrace == 0 means we are the natural parent. In this case
+ * we should clear notask_error, debugger will notify us.
*/
- if (likely(!ptrace) && unlikely(ptrace_reparented(p)))
+ if (likely(!ptrace))
wo->notask_error = 0;
return 0;
}
+ if (likely(!ptrace) && unlikely(p->ptrace)) {
+ /*
+ * If it is traced by its real parent's group, just pretend
+ * the caller is ptrace_do_wait() and reap this child if it
+ * is zombie.
+ *
+ * This also hides group stop state from real parent; otherwise
+ * a single stop can be reported twice as group and ptrace stop.
+ * If a ptracer wants to distinguish these two events for its
+ * own children it should create a separate process which takes
+ * the role of real parent.
+ */
+ if (!ptrace_reparented(p))
+ ptrace = 1;
+ }
+
/* slay zombie? */
if (p->exit_state == EXIT_ZOMBIE) {
- /*
- * A zombie ptracee is only visible to its ptracer.
- * Notification and reaping will be cascaded to the real
- * parent when the ptracer detaches.
- */
- if (likely(!ptrace) && unlikely(p->ptrace)) {
- /* it will become visible, clear notask_error */
- wo->notask_error = 0;
- return 0;
- }
-
/* we don't reap group leaders with subthreads */
- if (!delay_group_leader(p))
- return wait_task_zombie(wo, p);
+ if (!delay_group_leader(p)) {
+ /*
+ * A zombie ptracee is only visible to its ptracer.
+ * Notification and reaping will be cascaded to the
+ * real parent when the ptracer detaches.
+ */
+ if (unlikely(ptrace) || likely(!p->ptrace))
+ return wait_task_zombie(wo, p);
+ }
/*
* Allow access to stopped/continued state via zombie by
@@ -1408,19 +1415,6 @@
wo->notask_error = 0;
} else {
/*
- * If @p is ptraced by a task in its real parent's group,
- * hide group stop/continued state when looking at @p as
- * the real parent; otherwise, a single stop can be
- * reported twice as group and ptrace stops.
- *
- * If a ptracer wants to distinguish the two events for its
- * own children, it should create a separate process which
- * takes the role of real parent.
- */
- if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p))
- return 0;
-
- /*
* @p is alive and it's gonna stop, continue or exit, so
* there always is something to wait for.
*/
diff --git a/kernel/fork.c b/kernel/fork.c
index abc4589..54a8d26 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -28,6 +28,8 @@
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/nsproxy.h>
#include <linux/capability.h>
#include <linux/cpu.h>
@@ -71,6 +73,7 @@
#include <linux/signalfd.h>
#include <linux/uprobes.h>
#include <linux/aio.h>
+#include <linux/compiler.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -284,7 +287,7 @@
init_task.signal->rlim[RLIMIT_NPROC];
}
-int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
+int __weak arch_dup_task_struct(struct task_struct *dst,
struct task_struct *src)
{
*dst = *src;
@@ -364,7 +367,7 @@
mm->locked_vm = 0;
mm->mmap = NULL;
- mm->mmap_cache = NULL;
+ mm->vmacache_seqnum = 0;
mm->map_count = 0;
cpumask_clear(mm_cpumask(mm));
mm->mm_rb = RB_ROOT;
@@ -530,8 +533,6 @@
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
INIT_LIST_HEAD(&mm->mmlist);
- mm->flags = (current->mm) ?
- (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
mm->core_state = NULL;
atomic_long_set(&mm->nr_ptes, 0);
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
@@ -540,8 +541,15 @@
mm_init_owner(mm, p);
clear_tlb_flush_pending(mm);
- if (likely(!mm_alloc_pgd(mm))) {
+ if (current->mm) {
+ mm->flags = current->mm->flags & MMF_INIT_MASK;
+ mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
+ } else {
+ mm->flags = default_dump_filter;
mm->def_flags = 0;
+ }
+
+ if (likely(!mm_alloc_pgd(mm))) {
mmu_notifier_mm_init(mm);
return mm;
}
@@ -877,6 +885,9 @@
if (!oldmm)
return 0;
+ /* initialize the new vmacache entries */
+ vmacache_flush(tsk);
+
if (clone_flags & CLONE_VM) {
atomic_inc(&oldmm->mm_users);
mm = oldmm;
@@ -1070,15 +1081,6 @@
return 0;
}
-static void copy_flags(unsigned long clone_flags, struct task_struct *p)
-{
- unsigned long new_flags = p->flags;
-
- new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
- new_flags |= PF_FORKNOEXEC;
- p->flags = new_flags;
-}
-
SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
{
current->clear_child_tid = tidptr;
@@ -1228,7 +1230,8 @@
goto bad_fork_cleanup_count;
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
- copy_flags(clone_flags, p);
+ p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
+ p->flags |= PF_FORKNOEXEC;
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
rcu_copy_process(p);
@@ -1274,7 +1277,6 @@
p->mempolicy = NULL;
goto bad_fork_cleanup_threadgroup_lock;
}
- mpol_fix_fork_child_flag(p);
#endif
#ifdef CONFIG_CPUSETS
p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 3127ad5..cb0cf37 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -23,6 +23,7 @@
#include <linux/mm.h>
#include <linux/ctype.h>
#include <linux/slab.h>
+#include <linux/compiler.h>
#include <asm/sections.h>
@@ -36,8 +37,8 @@
* These will be re-linked against their real values
* during the second link stage.
*/
-extern const unsigned long kallsyms_addresses[] __attribute__((weak));
-extern const u8 kallsyms_names[] __attribute__((weak));
+extern const unsigned long kallsyms_addresses[] __weak;
+extern const u8 kallsyms_names[] __weak;
/*
* Tell the compiler that the count isn't in the small data section if the arch
@@ -46,10 +47,10 @@
extern const unsigned long kallsyms_num_syms
__attribute__((weak, section(".rodata")));
-extern const u8 kallsyms_token_table[] __attribute__((weak));
-extern const u16 kallsyms_token_index[] __attribute__((weak));
+extern const u8 kallsyms_token_table[] __weak;
+extern const u16 kallsyms_token_index[] __weak;
-extern const unsigned long kallsyms_markers[] __attribute__((weak));
+extern const unsigned long kallsyms_markers[] __weak;
static inline int is_kernel_inittext(unsigned long addr)
{
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c0d261c..c8380ad 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -32,6 +32,7 @@
#include <linux/vmalloc.h>
#include <linux/swap.h>
#include <linux/syscore_ops.h>
+#include <linux/compiler.h>
#include <asm/page.h>
#include <asm/uaccess.h>
@@ -1551,10 +1552,10 @@
* provide an empty default implementation here -- architecture
* code may override this
*/
-void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
+void __weak arch_crash_save_vmcoreinfo(void)
{}
-unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
+unsigned long __weak paddr_vmcoreinfo_note(void)
{
return __pa((unsigned long)(char *)&vmcoreinfo_note);
}
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index e660964..2495a9b 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -18,6 +18,7 @@
#include <linux/stat.h>
#include <linux/sched.h>
#include <linux/capability.h>
+#include <linux/compiler.h>
#include <linux/rcupdate.h> /* rcu_expedited */
@@ -162,8 +163,8 @@
/*
* Make /sys/kernel/notes give the raw contents of our kernel .notes section.
*/
-extern const void __start_notes __attribute__((weak));
-extern const void __stop_notes __attribute__((weak));
+extern const void __start_notes __weak;
+extern const void __stop_notes __weak;
#define notes_size (&__stop_notes - &__start_notes)
static ssize_t notes_read(struct file *filp, struct kobject *kobj,
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 306a76b..b8bdcd4 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,5 +1,5 @@
-obj-y += mutex.o semaphore.o rwsem.o lglock.o mcs_spinlock.o
+obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_lockdep.o = -pg
@@ -14,6 +14,7 @@
obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
endif
obj-$(CONFIG_SMP) += spinlock.o
+obj-$(CONFIG_SMP) += lglock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
diff --git a/kernel/module.c b/kernel/module.c
index 29f7790..1186940 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -640,7 +640,7 @@
INIT_LIST_HEAD(&mod->target_list);
/* Hold reference count during initialization. */
- __this_cpu_write(mod->refptr->incs, 1);
+ raw_cpu_write(mod->refptr->incs, 1);
return 0;
}
diff --git a/kernel/panic.c b/kernel/panic.c
index 79fd820..d02fa9f 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -100,7 +100,7 @@
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
- printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
+ pr_emerg("Kernel panic - not syncing: %s\n", buf);
#ifdef CONFIG_DEBUG_BUGVERBOSE
/*
* Avoid nested stack-dumping if a panic occurs during oops processing
@@ -141,7 +141,7 @@
* Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked.
*/
- printk(KERN_EMERG "Rebooting in %d seconds..", panic_timeout);
+ pr_emerg("Rebooting in %d seconds..", panic_timeout);
for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
touch_nmi_watchdog();
@@ -165,7 +165,7 @@
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
- printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
+ pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
}
#endif
#if defined(CONFIG_S390)
@@ -176,6 +176,7 @@
disabled_wait(caller);
}
#endif
+ pr_emerg("---[ end Kernel panic - not syncing: %s\n", buf);
local_irq_enable();
for (i = 0; ; i += PANIC_TIMER_STEP) {
touch_softlockup_watchdog();
@@ -276,8 +277,7 @@
void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
{
if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off())
- printk(KERN_WARNING
- "Disabling lock debugging due to kernel taint\n");
+ pr_warn("Disabling lock debugging due to kernel taint\n");
set_bit(flag, &tainted_mask);
}
@@ -382,8 +382,7 @@
void print_oops_end_marker(void)
{
init_oops_id();
- printk(KERN_WARNING "---[ end trace %016llx ]---\n",
- (unsigned long long)oops_id);
+ pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
}
/*
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 1ca7531..15f37ea 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -2,6 +2,7 @@
#include <linux/suspend_ioctls.h>
#include <linux/utsname.h>
#include <linux/freezer.h>
+#include <linux/compiler.h>
struct swsusp_info {
struct new_utsname uts;
@@ -11,7 +12,7 @@
unsigned long image_pages;
unsigned long pages;
unsigned long size;
-} __attribute__((aligned(PAGE_SIZE)));
+} __aligned(PAGE_SIZE);
#ifdef CONFIG_HIBERNATION
/* kernel/power/snapshot.c */
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 149e745..18fb7a2 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -27,6 +27,7 @@
#include <linux/highmem.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/compiler.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -155,7 +156,7 @@
struct linked_page {
struct linked_page *next;
char data[LINKED_PAGE_DATA_SIZE];
-} __attribute__((packed));
+} __packed;
static inline void
free_list_of_pages(struct linked_page *list, int clear_page_nosave)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 90b3d93..c3ad9ca 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -26,6 +26,7 @@
#include <linux/syscore_ops.h>
#include <linux/ftrace.h>
#include <trace/events/power.h>
+#include <linux/compiler.h>
#include "power.h"
@@ -156,13 +157,13 @@
}
/* default implementation */
-void __attribute__ ((weak)) arch_suspend_disable_irqs(void)
+void __weak arch_suspend_disable_irqs(void)
{
local_irq_disable();
}
/* default implementation */
-void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
+void __weak arch_suspend_enable_irqs(void)
{
local_irq_enable();
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 7c33ed2..8c9a481 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -101,7 +101,7 @@
unsigned int flags; /* Flags to pass to the "boot" kernel */
char orig_sig[10];
char sig[10];
-} __attribute__((packed));
+} __packed;
static struct swsusp_header *swsusp_header;
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 4aa8a30..51dbac6 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -22,8 +22,18 @@
counter->parent = parent;
}
-int res_counter_charge_locked(struct res_counter *counter, unsigned long val,
- bool force)
+static u64 res_counter_uncharge_locked(struct res_counter *counter,
+ unsigned long val)
+{
+ if (WARN_ON(counter->usage < val))
+ val = counter->usage;
+
+ counter->usage -= val;
+ return counter->usage;
+}
+
+static int res_counter_charge_locked(struct res_counter *counter,
+ unsigned long val, bool force)
{
int ret = 0;
@@ -86,15 +96,6 @@
return __res_counter_charge(counter, val, limit_fail_at, true);
}
-u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
-{
- if (WARN_ON(counter->usage < val))
- val = counter->usage;
-
- counter->usage -= val;
- return counter->usage;
-}
-
u64 res_counter_uncharge_until(struct res_counter *counter,
struct res_counter *top,
unsigned long val)
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index b30a292..3ef6451 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -60,13 +60,14 @@
#include <linux/sched.h>
#include <linux/static_key.h>
#include <linux/workqueue.h>
+#include <linux/compiler.h>
/*
* Scheduler clock - returns current time in nanosec units.
* This is default implementation.
* Architectures and sub-architectures can override this.
*/
-unsigned long long __attribute__((weak)) sched_clock(void)
+unsigned long long __weak sched_clock(void)
{
return (unsigned long long)(jiffies - INITIAL_JIFFIES)
* (NSEC_PER_SEC / HZ);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0ff3f34..268a45e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -73,6 +73,7 @@
#include <linux/init_task.h>
#include <linux/binfmts.h>
#include <linux/context_tracking.h>
+#include <linux/compiler.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6452,7 +6453,7 @@
* cpu core maps. It is supposed to return 1 if the topology changed
* or 0 if it stayed the same.
*/
-int __attribute__((weak)) arch_update_cpu_topology(void)
+int __weak arch_update_cpu_topology(void)
{
return 0;
}
diff --git a/kernel/signal.c b/kernel/signal.c
index 5d4b05a..6ea13c0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -33,6 +33,8 @@
#include <linux/uprobes.h>
#include <linux/compat.h>
#include <linux/cn_proc.h>
+#include <linux/compiler.h>
+
#define CREATE_TRACE_POINTS
#include <trace/events/signal.h>
@@ -3618,7 +3620,7 @@
}
#endif
-__attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
+__weak const char *arch_vma_name(struct vm_area_struct *vma)
{
return NULL;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index adaeab6..fba0f29 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1996,6 +1996,21 @@
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
return current->no_new_privs ? 1 : 0;
+ case PR_GET_THP_DISABLE:
+ if (arg2 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
+ break;
+ case PR_SET_THP_DISABLE:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ down_write(&me->mm->mmap_sem);
+ if (arg2)
+ me->mm->def_flags |= VM_NOHUGEPAGE;
+ else
+ me->mm->def_flags &= ~VM_NOHUGEPAGE;
+ up_write(&me->mm->mmap_sem);
+ break;
default:
error = -EINVAL;
break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5c14b54..74f5b58 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -141,6 +141,11 @@
static int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
+/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
+#ifdef CONFIG_DETECT_HUNG_TASK
+static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
+#endif
+
#ifdef CONFIG_INOTIFY_USER
#include <linux/inotify.h>
#endif
@@ -985,6 +990,7 @@
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = &hung_task_timeout_max,
},
{
.procname = "hung_task_warnings",
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5b40279..f7df8ea 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -22,6 +22,7 @@
#include <linux/tick.h>
#include <linux/stop_machine.h>
#include <linux/pvclock_gtod.h>
+#include <linux/compiler.h>
#include "tick-internal.h"
#include "ntp_internal.h"
@@ -760,7 +761,7 @@
*
* XXX - Do be sure to remove it once all arches implement it.
*/
-void __attribute__((weak)) read_persistent_clock(struct timespec *ts)
+void __weak read_persistent_clock(struct timespec *ts)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
@@ -775,7 +776,7 @@
*
* XXX - Do be sure to remove it once all arches implement it.
*/
-void __attribute__((weak)) read_boot_clock(struct timespec *ts)
+void __weak read_boot_clock(struct timespec *ts)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ffc314b7..2e29d7b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -13,6 +13,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
+#include <linux/compiler.h>
#ifdef CONFIG_FTRACE_SYSCALLS
#include <asm/unistd.h> /* For NR_SYSCALLS */
@@ -1279,7 +1280,7 @@
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
extern struct ftrace_event_call \
- __attribute__((__aligned__(4))) event_##call;
+ __aligned(4) event_##call;
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
diff --git a/lib/Kconfig b/lib/Kconfig
index 991c98b..5d4984c 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -342,9 +342,9 @@
select GENERIC_IO
default y
-config HAS_IOPORT
+config HAS_IOPORT_MAP
boolean
- depends on HAS_IOMEM && !NO_IOPORT
+ depends on HAS_IOMEM && !NO_IOPORT_MAP
default y
config HAS_DMA
diff --git a/lib/decompress.c b/lib/decompress.c
index 4d1cd03..86069d74 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/init.h>
+#include <linux/printk.h>
#ifndef CONFIG_DECOMPRESS_GZIP
# define gunzip NULL
@@ -61,6 +62,8 @@
if (len < 2)
return NULL; /* Need at least this much... */
+ pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]);
+
for (cf = compressed_formats; cf->name; cf++) {
if (!memcmp(inbuf, cf->magic, 2))
break;
diff --git a/lib/devres.c b/lib/devres.c
index 48cb3c7..2f16c13 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -170,7 +170,7 @@
}
EXPORT_SYMBOL(devm_request_and_ioremap);
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
/*
* Generic iomap devres
*/
@@ -229,7 +229,7 @@
devm_ioport_map_match, (__force void *)addr));
}
EXPORT_SYMBOL(devm_ioport_unmap);
-#endif /* CONFIG_HAS_IOPORT */
+#endif /* CONFIG_HAS_IOPORT_MAP */
#ifdef CONFIG_PCI
/*
diff --git a/lib/idr.c b/lib/idr.c
index 1ba4956..2642fa8 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -196,7 +196,7 @@
}
}
-int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
+static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
{
while (idp->id_free_cnt < MAX_IDR_FREE) {
struct idr_layer *new;
@@ -207,7 +207,6 @@
}
return 1;
}
-EXPORT_SYMBOL(__idr_pre_get);
/**
* sub_alloc - try to allocate an id without growing the tree depth
@@ -374,20 +373,6 @@
idr_mark_full(pa, id);
}
-int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
-{
- struct idr_layer *pa[MAX_IDR_LEVEL + 1];
- int rv;
-
- rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp);
- if (rv < 0)
- return rv == -ENOMEM ? -EAGAIN : rv;
-
- idr_fill_slot(idp, ptr, rv, pa);
- *id = rv;
- return 0;
-}
-EXPORT_SYMBOL(__idr_get_new_above);
/**
* idr_preload - preload for idr_alloc()
@@ -548,7 +533,7 @@
n = id & IDR_MASK;
if (likely(p != NULL && test_bit(n, p->bitmap))) {
__clear_bit(n, p->bitmap);
- rcu_assign_pointer(p->ary[n], NULL);
+ RCU_INIT_POINTER(p->ary[n], NULL);
to_free = NULL;
while(*paa && ! --((**paa)->count)){
if (to_free)
@@ -607,7 +592,7 @@
}
EXPORT_SYMBOL(idr_remove);
-void __idr_remove_all(struct idr *idp)
+static void __idr_remove_all(struct idr *idp)
{
int n, id, max;
int bt_mask;
@@ -617,7 +602,7 @@
n = idp->layers * IDR_BITS;
p = idp->top;
- rcu_assign_pointer(idp->top, NULL);
+ RCU_INIT_POINTER(idp->top, NULL);
max = idr_max(idp->layers);
id = 0;
@@ -640,7 +625,6 @@
}
idp->layers = 0;
}
-EXPORT_SYMBOL(__idr_remove_all);
/**
* idr_destroy - release all cached layers within an idr tree
diff --git a/lib/iomap.c b/lib/iomap.c
index 2c08f36..fc3dcb4 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -224,7 +224,7 @@
EXPORT_SYMBOL(iowrite16_rep);
EXPORT_SYMBOL(iowrite32_rep);
-#ifdef CONFIG_HAS_IOPORT
+#ifdef CONFIG_HAS_IOPORT_MAP
/* Create a virtual mapping cookie for an IO port range */
void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
@@ -239,7 +239,7 @@
}
EXPORT_SYMBOL(ioport_map);
EXPORT_SYMBOL(ioport_unmap);
-#endif /* CONFIG_HAS_IOPORT */
+#endif /* CONFIG_HAS_IOPORT_MAP */
#ifdef CONFIG_PCI
/* Hide the details if this is a MMIO or PIO address space and just do what
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 04abe53..1afec32 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -7,7 +7,8 @@
#include <linux/kallsyms.h>
#include <linux/sched.h>
-notrace unsigned int debug_smp_processor_id(void)
+notrace static unsigned int check_preemption_disabled(const char *what1,
+ const char *what2)
{
int this_cpu = raw_smp_processor_id();
@@ -38,9 +39,9 @@
if (!printk_ratelimit())
goto out_enable;
- printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] "
- "code: %s/%d\n",
- preempt_count() - 1, current->comm, current->pid);
+ printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
+ what1, what2, preempt_count() - 1, current->comm, current->pid);
+
print_symbol("caller is %s\n", (long)__builtin_return_address(0));
dump_stack();
@@ -50,5 +51,14 @@
return this_cpu;
}
+notrace unsigned int debug_smp_processor_id(void)
+{
+ return check_preemption_disabled("smp_processor_id", "");
+}
EXPORT_SYMBOL(debug_smp_processor_id);
+notrace void __this_cpu_preempt_check(const char *op)
+{
+ check_preemption_disabled("__this_cpu_", op);
+}
+EXPORT_SYMBOL(__this_cpu_preempt_check);
diff --git a/mm/Kconfig b/mm/Kconfig
index 2888024..ebe5880 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -216,6 +216,7 @@
#
config SPLIT_PTLOCK_CPUS
int
+ default "999999" if !MMU
default "999999" if ARM && !CPU_CACHE_VIPT
default "999999" if PARISC && !PA20
default "4"
@@ -577,3 +578,6 @@
You can check speed with zsmalloc benchmark:
https://github.com/spartacus06/zsmapbench
+
+config GENERIC_EARLY_IOREMAP
+ bool
diff --git a/mm/Makefile b/mm/Makefile
index cdd7415..9e5aaf9 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,7 @@
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
mm_init.o mmu_context.o percpu.o slab_common.o \
- compaction.o balloon_compaction.o \
+ compaction.o balloon_compaction.o vmacache.o \
interval_tree.o list_lru.o workingset.o $(mmu-y)
obj-y += init-mm.o
@@ -61,3 +61,4 @@
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
obj-$(CONFIG_ZBUD) += zbud.o
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
+obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
diff --git a/mm/compaction.c b/mm/compaction.c
index b6ab771..37f9762 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -217,21 +217,12 @@
/* Returns true if the page is within a block suitable for migration to */
static bool suitable_migration_target(struct page *page)
{
- int migratetype = get_pageblock_migratetype(page);
-
- /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
- if (migratetype == MIGRATE_RESERVE)
- return false;
-
- if (is_migrate_isolate(migratetype))
- return false;
-
- /* If the page is a large free page, then allow migration */
+ /* If the page is a large free page, then disallow migration */
if (PageBuddy(page) && page_order(page) >= pageblock_order)
- return true;
+ return false;
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
- if (migrate_async_suitable(migratetype))
+ if (migrate_async_suitable(get_pageblock_migratetype(page)))
return true;
/* Otherwise skip the block */
@@ -253,6 +244,7 @@
struct page *cursor, *valid_page = NULL;
unsigned long flags;
bool locked = false;
+ bool checked_pageblock = false;
cursor = pfn_to_page(blockpfn);
@@ -284,8 +276,16 @@
break;
/* Recheck this is a suitable migration target under lock */
- if (!strict && !suitable_migration_target(page))
- break;
+ if (!strict && !checked_pageblock) {
+ /*
+ * We need to check suitability of pageblock only once
+ * and this isolate_freepages_block() is called with
+ * pageblock range, so just check once is sufficient.
+ */
+ checked_pageblock = true;
+ if (!suitable_migration_target(page))
+ break;
+ }
/* Recheck this is a buddy page under lock */
if (!PageBuddy(page))
@@ -460,12 +460,13 @@
unsigned long last_pageblock_nr = 0, pageblock_nr;
unsigned long nr_scanned = 0, nr_isolated = 0;
struct list_head *migratelist = &cc->migratepages;
- isolate_mode_t mode = 0;
struct lruvec *lruvec;
unsigned long flags;
bool locked = false;
struct page *page = NULL, *valid_page = NULL;
bool skipped_async_unsuitable = false;
+ const isolate_mode_t mode = (!cc->sync ? ISOLATE_ASYNC_MIGRATE : 0) |
+ (unevictable ? ISOLATE_UNEVICTABLE : 0);
/*
* Ensure that there are not too many pages isolated from the LRU
@@ -487,7 +488,7 @@
cond_resched();
for (; low_pfn < end_pfn; low_pfn++) {
/* give a chance to irqs before checking need_resched() */
- if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
+ if (locked && !(low_pfn % SWAP_CLUSTER_MAX)) {
if (should_release_lock(&zone->lru_lock)) {
spin_unlock_irqrestore(&zone->lru_lock, flags);
locked = false;
@@ -526,8 +527,25 @@
/* If isolation recently failed, do not retry */
pageblock_nr = low_pfn >> pageblock_order;
- if (!isolation_suitable(cc, page))
- goto next_pageblock;
+ if (last_pageblock_nr != pageblock_nr) {
+ int mt;
+
+ last_pageblock_nr = pageblock_nr;
+ if (!isolation_suitable(cc, page))
+ goto next_pageblock;
+
+ /*
+ * For async migration, also only scan in MOVABLE
+ * blocks. Async migration is optimistic to see if
+ * the minimum amount of work satisfies the allocation
+ */
+ mt = get_pageblock_migratetype(page);
+ if (!cc->sync && !migrate_async_suitable(mt)) {
+ cc->finished_update_migrate = true;
+ skipped_async_unsuitable = true;
+ goto next_pageblock;
+ }
+ }
/*
* Skip if free. page_order cannot be used without zone->lock
@@ -537,18 +555,6 @@
continue;
/*
- * For async migration, also only scan in MOVABLE blocks. Async
- * migration is optimistic to see if the minimum amount of work
- * satisfies the allocation
- */
- if (!cc->sync && last_pageblock_nr != pageblock_nr &&
- !migrate_async_suitable(get_pageblock_migratetype(page))) {
- cc->finished_update_migrate = true;
- skipped_async_unsuitable = true;
- goto next_pageblock;
- }
-
- /*
* Check may be lockless but that's ok as we recheck later.
* It's possible to migrate LRU pages and balloon pages
* Skip any other type of page
@@ -557,11 +563,7 @@
if (unlikely(balloon_page_movable(page))) {
if (locked && balloon_page_isolate(page)) {
/* Successfully isolated */
- cc->finished_update_migrate = true;
- list_add(&page->lru, migratelist);
- cc->nr_migratepages++;
- nr_isolated++;
- goto check_compact_cluster;
+ goto isolate_success;
}
}
continue;
@@ -607,12 +609,6 @@
continue;
}
- if (!cc->sync)
- mode |= ISOLATE_ASYNC_MIGRATE;
-
- if (unevictable)
- mode |= ISOLATE_UNEVICTABLE;
-
lruvec = mem_cgroup_page_lruvec(page, zone);
/* Try isolate the page */
@@ -622,13 +618,14 @@
VM_BUG_ON_PAGE(PageTransCompound(page), page);
/* Successfully isolated */
- cc->finished_update_migrate = true;
del_page_from_lru_list(page, lruvec, page_lru(page));
+
+isolate_success:
+ cc->finished_update_migrate = true;
list_add(&page->lru, migratelist);
cc->nr_migratepages++;
nr_isolated++;
-check_compact_cluster:
/* Avoid isolating too much */
if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
++low_pfn;
@@ -639,7 +636,6 @@
next_pageblock:
low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1;
- last_pageblock_nr = pageblock_nr;
}
acct_isolated(zone, locked, cc);
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
new file mode 100644
index 0000000..e10ccd2
--- /dev/null
+++ b/mm/early_ioremap.c
@@ -0,0 +1,245 @@
+/*
+ * Provide common bits of early_ioremap() support for architectures needing
+ * temporary mappings during boot before ioremap() is available.
+ *
+ * This is mostly a direct copy of the x86 early_ioremap implementation.
+ *
+ * (C) Copyright 1995 1996, 2014 Linus Torvalds
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <asm/fixmap.h>
+
+#ifdef CONFIG_MMU
+static int early_ioremap_debug __initdata;
+
+static int __init early_ioremap_debug_setup(char *str)
+{
+ early_ioremap_debug = 1;
+
+ return 0;
+}
+early_param("early_ioremap_debug", early_ioremap_debug_setup);
+
+static int after_paging_init __initdata;
+
+void __init __weak early_ioremap_shutdown(void)
+{
+}
+
+void __init early_ioremap_reset(void)
+{
+ early_ioremap_shutdown();
+ after_paging_init = 1;
+}
+
+/*
+ * Generally, ioremap() is available after paging_init() has been called.
+ * Architectures wanting to allow early_ioremap after paging_init() can
+ * define __late_set_fixmap and __late_clear_fixmap to do the right thing.
+ */
+#ifndef __late_set_fixmap
+static inline void __init __late_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t prot)
+{
+ BUG();
+}
+#endif
+
+#ifndef __late_clear_fixmap
+static inline void __init __late_clear_fixmap(enum fixed_addresses idx)
+{
+ BUG();
+}
+#endif
+
+static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
+static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
+static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
+
+void __init early_ioremap_setup(void)
+{
+ int i;
+
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
+ if (WARN_ON(prev_map[i]))
+ break;
+
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
+ slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
+}
+
+static int __init check_early_ioremap_leak(void)
+{
+ int count = 0;
+ int i;
+
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
+ if (prev_map[i])
+ count++;
+
+ if (WARN(count, KERN_WARNING
+ "Debug warning: early ioremap leak of %d areas detected.\n"
+ "please boot with early_ioremap_debug and report the dmesg.\n",
+ count))
+ return 1;
+ return 0;
+}
+late_initcall(check_early_ioremap_leak);
+
+static void __init __iomem *
+__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
+{
+ unsigned long offset;
+ resource_size_t last_addr;
+ unsigned int nrpages;
+ enum fixed_addresses idx;
+ int i, slot;
+
+ WARN_ON(system_state != SYSTEM_BOOTING);
+
+ slot = -1;
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
+ if (!prev_map[i]) {
+ slot = i;
+ break;
+ }
+ }
+
+ if (WARN(slot < 0, "%s(%08llx, %08lx) not found slot\n",
+ __func__, (u64)phys_addr, size))
+ return NULL;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (WARN_ON(!size || last_addr < phys_addr))
+ return NULL;
+
+ prev_size[slot] = size;
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr + 1) - phys_addr;
+
+ /*
+ * Mappings have to fit in the FIX_BTMAP area.
+ */
+ nrpages = size >> PAGE_SHIFT;
+ if (WARN_ON(nrpages > NR_FIX_BTMAPS))
+ return NULL;
+
+ /*
+ * Ok, go for it..
+ */
+ idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
+ while (nrpages > 0) {
+ if (after_paging_init)
+ __late_set_fixmap(idx, phys_addr, prot);
+ else
+ __early_set_fixmap(idx, phys_addr, prot);
+ phys_addr += PAGE_SIZE;
+ --idx;
+ --nrpages;
+ }
+ WARN(early_ioremap_debug, "%s(%08llx, %08lx) [%d] => %08lx + %08lx\n",
+ __func__, (u64)phys_addr, size, slot, offset, slot_virt[slot]);
+
+ prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
+ return prev_map[slot];
+}
+
+void __init early_iounmap(void __iomem *addr, unsigned long size)
+{
+ unsigned long virt_addr;
+ unsigned long offset;
+ unsigned int nrpages;
+ enum fixed_addresses idx;
+ int i, slot;
+
+ slot = -1;
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
+ if (prev_map[i] == addr) {
+ slot = i;
+ break;
+ }
+ }
+
+ if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n",
+ addr, size))
+ return;
+
+ if (WARN(prev_size[slot] != size,
+ "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
+ addr, size, slot, prev_size[slot]))
+ return;
+
+ WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n",
+ addr, size, slot);
+
+ virt_addr = (unsigned long)addr;
+ if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)))
+ return;
+
+ offset = virt_addr & ~PAGE_MASK;
+ nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT;
+
+ idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
+ while (nrpages > 0) {
+ if (after_paging_init)
+ __late_clear_fixmap(idx);
+ else
+ __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR);
+ --idx;
+ --nrpages;
+ }
+ prev_map[slot] = NULL;
+}
+
+/* Remap an IO device */
+void __init __iomem *
+early_ioremap(resource_size_t phys_addr, unsigned long size)
+{
+ return __early_ioremap(phys_addr, size, FIXMAP_PAGE_IO);
+}
+
+/* Remap memory */
+void __init *
+early_memremap(resource_size_t phys_addr, unsigned long size)
+{
+ return (__force void *)__early_ioremap(phys_addr, size,
+ FIXMAP_PAGE_NORMAL);
+}
+#else /* CONFIG_MMU */
+
+void __init __iomem *
+early_ioremap(resource_size_t phys_addr, unsigned long size)
+{
+ return (__force void __iomem *)phys_addr;
+}
+
+/* Remap memory */
+void __init *
+early_memremap(resource_size_t phys_addr, unsigned long size)
+{
+ return (void *)phys_addr;
+}
+
+void __init early_iounmap(void __iomem *addr, unsigned long size)
+{
+}
+
+#endif /* CONFIG_MMU */
+
+
+void __init early_memunmap(void *addr, unsigned long size)
+{
+ early_iounmap((__force void __iomem *)addr, size);
+}
diff --git a/mm/filemap.c b/mm/filemap.c
index 21781f1..27ebc0c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/memcontrol.h>
#include <linux/cleancache.h>
+#include <linux/rmap.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
@@ -562,7 +563,7 @@
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapBacked(page), page);
- error = mem_cgroup_cache_charge(page, current->mm,
+ error = mem_cgroup_charge_file(page, current->mm,
gfp_mask & GFP_RECLAIM_MASK);
if (error)
return error;
@@ -1952,11 +1953,11 @@
struct inode *inode = mapping->host;
pgoff_t offset = vmf->pgoff;
struct page *page;
- pgoff_t size;
+ loff_t size;
int ret = 0;
- size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (offset >= size)
+ size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
+ if (offset >= size >> PAGE_CACHE_SHIFT)
return VM_FAULT_SIGBUS;
/*
@@ -2005,8 +2006,8 @@
* Found the page and have a reference on it.
* We must recheck i_size under page lock.
*/
- size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- if (unlikely(offset >= size)) {
+ size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
+ if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) {
unlock_page(page);
page_cache_release(page);
return VM_FAULT_SIGBUS;
@@ -2064,6 +2065,78 @@
}
EXPORT_SYMBOL(filemap_fault);
+void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ struct file *file = vma->vm_file;
+ struct address_space *mapping = file->f_mapping;
+ loff_t size;
+ struct page *page;
+ unsigned long address = (unsigned long) vmf->virtual_address;
+ unsigned long addr;
+ pte_t *pte;
+
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) {
+ if (iter.index > vmf->max_pgoff)
+ break;
+repeat:
+ page = radix_tree_deref_slot(slot);
+ if (unlikely(!page))
+ goto next;
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page))
+ break;
+ else
+ goto next;
+ }
+
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *slot)) {
+ page_cache_release(page);
+ goto repeat;
+ }
+
+ if (!PageUptodate(page) ||
+ PageReadahead(page) ||
+ PageHWPoison(page))
+ goto skip;
+ if (!trylock_page(page))
+ goto skip;
+
+ if (page->mapping != mapping || !PageUptodate(page))
+ goto unlock;
+
+ size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE);
+ if (page->index >= size >> PAGE_CACHE_SHIFT)
+ goto unlock;
+
+ pte = vmf->pte + page->index - vmf->pgoff;
+ if (!pte_none(*pte))
+ goto unlock;
+
+ if (file->f_ra.mmap_miss > 0)
+ file->f_ra.mmap_miss--;
+ addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
+ do_set_pte(vma, addr, page, pte, false, false);
+ unlock_page(page);
+ goto next;
+unlock:
+ unlock_page(page);
+skip:
+ page_cache_release(page);
+next:
+ if (iter.index == vmf->max_pgoff)
+ break;
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(filemap_map_pages);
+
int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
@@ -2093,6 +2166,7 @@
const struct vm_operations_struct generic_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = filemap_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 6ac89e9..64635f5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -827,7 +827,7 @@
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
- if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
+ if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) {
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -968,7 +968,7 @@
__GFP_OTHER_NODE,
vma, address, page_to_nid(page));
if (unlikely(!pages[i] ||
- mem_cgroup_newpage_charge(pages[i], mm,
+ mem_cgroup_charge_anon(pages[i], mm,
GFP_KERNEL))) {
if (pages[i])
put_page(pages[i]);
@@ -1101,7 +1101,7 @@
goto out;
}
- if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
+ if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) {
put_page(new_page);
if (page) {
split_huge_page(page);
@@ -1891,17 +1891,22 @@
int hugepage_madvise(struct vm_area_struct *vma,
unsigned long *vm_flags, int advice)
{
- struct mm_struct *mm = vma->vm_mm;
-
switch (advice) {
case MADV_HUGEPAGE:
+#ifdef CONFIG_S390
+ /*
+ * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390
+ * can't handle this properly after s390_enable_sie, so we simply
+ * ignore the madvise to prevent qemu from causing a SIGSEGV.
+ */
+ if (mm_has_pgste(vma->vm_mm))
+ return 0;
+#endif
/*
* Be somewhat over-protective like KSM for now!
*/
if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
return -EINVAL;
- if (mm->def_flags & VM_NOHUGEPAGE)
- return -EINVAL;
*vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE;
/*
@@ -2354,7 +2359,7 @@
if (!new_page)
return;
- if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
+ if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)))
return;
/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7c02b9d..dd30f22 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -13,6 +13,7 @@
#include <linux/nodemask.h>
#include <linux/pagemap.h>
#include <linux/mempolicy.h>
+#include <linux/compiler.h>
#include <linux/cpuset.h>
#include <linux/mutex.h>
#include <linux/bootmem.h>
@@ -1535,6 +1536,7 @@
while (min_count < persistent_huge_pages(h)) {
if (!free_pool_huge_page(h, nodes_allowed, 0))
break;
+ cond_resched_lock(&hugetlb_lock);
}
while (count < persistent_huge_pages(h)) {
if (!adjust_pool_surplus(h, nodes_allowed, 1))
@@ -2690,7 +2692,8 @@
BUG_ON(huge_pte_none(pte));
spin_lock(ptl);
ptep = huge_pte_offset(mm, address & huge_page_mask(h));
- if (likely(pte_same(huge_ptep_get(ptep), pte)))
+ if (likely(ptep &&
+ pte_same(huge_ptep_get(ptep), pte)))
goto retry_avoidcopy;
/*
* race occurs while re-acquiring page table
@@ -2734,7 +2737,7 @@
*/
spin_lock(ptl);
ptep = huge_pte_offset(mm, address & huge_page_mask(h));
- if (likely(pte_same(huge_ptep_get(ptep), pte))) {
+ if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
ClearPagePrivate(new_page);
/* Break COW */
@@ -2896,8 +2899,7 @@
if (anon_rmap) {
ClearPagePrivate(page);
hugepage_add_new_anon_rmap(page, vma, address);
- }
- else
+ } else
page_dup_rmap(page);
new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
&& (vma->vm_flags & VM_SHARED)));
@@ -3185,6 +3187,7 @@
BUG_ON(address >= end);
flush_cache_range(vma, address, end);
+ mmu_notifier_invalidate_range_start(mm, start, end);
mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
for (; address < end; address += huge_page_size(h)) {
spinlock_t *ptl;
@@ -3214,6 +3217,7 @@
*/
flush_tlb_range(vma, start, end);
mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
+ mmu_notifier_invalidate_range_end(mm, start, end);
return pages << h->order;
}
@@ -3518,7 +3522,7 @@
#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */
/* Can be overriden by architectures */
-__attribute__((weak)) struct page *
+struct page * __weak
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write)
{
diff --git a/mm/internal.h b/mm/internal.h
index 29e1e76..07b6736 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -11,6 +11,7 @@
#ifndef __MM_INTERNAL_H
#define __MM_INTERNAL_H
+#include <linux/fs.h>
#include <linux/mm.h>
void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
@@ -21,6 +22,20 @@
atomic_set(&page->_count, v);
}
+extern int __do_page_cache_readahead(struct address_space *mapping,
+ struct file *filp, pgoff_t offset, unsigned long nr_to_read,
+ unsigned long lookahead_size);
+
+/*
+ * Submit IO for the read-ahead request in file_ra_state.
+ */
+static inline unsigned long ra_submit(struct file_ra_state *ra,
+ struct address_space *mapping, struct file *filp)
+{
+ return __do_page_cache_readahead(mapping, filp,
+ ra->start, ra->size, ra->async_size);
+}
+
/*
* Turn a non-refcounted page (->_count == 0) into refcounted with
* a count of one.
@@ -370,5 +385,6 @@
#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
+#define ALLOC_FAIR 0x100 /* fair zone allocation */
#endif /* __MM_INTERNAL_H */
diff --git a/mm/memblock.c b/mm/memblock.c
index 7fe5354..e9d6ca9 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1253,7 +1253,7 @@
pages += end_pfn - start_pfn;
}
- return (phys_addr_t)pages << PAGE_SHIFT;
+ return PFN_PHYS(pages);
}
/* lowest address */
@@ -1271,16 +1271,14 @@
void __init memblock_enforce_memory_limit(phys_addr_t limit)
{
- unsigned long i;
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
+ struct memblock_region *r;
if (!limit)
return;
/* find out max address */
- for (i = 0; i < memblock.memory.cnt; i++) {
- struct memblock_region *r = &memblock.memory.regions[i];
-
+ for_each_memblock(memory, r) {
if (limit <= r->size) {
max_addr = r->base + limit;
break;
@@ -1326,7 +1324,7 @@
unsigned long *start_pfn, unsigned long *end_pfn)
{
struct memblock_type *type = &memblock.memory;
- int mid = memblock_search(type, (phys_addr_t)pfn << PAGE_SHIFT);
+ int mid = memblock_search(type, PFN_PHYS(pfn));
if (mid == -1)
return -1;
@@ -1379,13 +1377,12 @@
void __init_memblock memblock_trim_memory(phys_addr_t align)
{
- int i;
phys_addr_t start, end, orig_start, orig_end;
- struct memblock_type *mem = &memblock.memory;
+ struct memblock_region *r;
- for (i = 0; i < mem->cnt; i++) {
- orig_start = mem->regions[i].base;
- orig_end = mem->regions[i].base + mem->regions[i].size;
+ for_each_memblock(memory, r) {
+ orig_start = r->base;
+ orig_end = r->base + r->size;
start = round_up(orig_start, align);
end = round_down(orig_end, align);
@@ -1393,11 +1390,12 @@
continue;
if (start < end) {
- mem->regions[i].base = start;
- mem->regions[i].size = end - start;
+ r->base = start;
+ r->size = end - start;
} else {
- memblock_remove_region(mem, i);
- i--;
+ memblock_remove_region(&memblock.memory,
+ r - memblock.memory.regions);
+ r--;
}
}
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dcc8153..29501f0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -921,8 +921,6 @@
struct page *page,
bool anon, int nr_pages)
{
- preempt_disable();
-
/*
* Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
* counted as CACHE even if it's on ANON LRU.
@@ -947,8 +945,6 @@
}
__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
-
- preempt_enable();
}
unsigned long
@@ -1075,22 +1071,15 @@
return mem_cgroup_from_css(task_css(p, memory_cgrp_id));
}
-struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
{
struct mem_cgroup *memcg = NULL;
- if (!mm)
- return NULL;
- /*
- * Because we have no locks, mm->owner's may be being moved to other
- * cgroup. We use css_tryget() here even if this looks
- * pessimistic (rather than adding locks here).
- */
rcu_read_lock();
do {
memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
if (unlikely(!memcg))
- break;
+ memcg = root_mem_cgroup;
} while (!css_tryget(&memcg->css));
rcu_read_unlock();
return memcg;
@@ -1486,7 +1475,7 @@
p = find_lock_task_mm(task);
if (p) {
- curr = try_get_mem_cgroup_from_mm(p->mm);
+ curr = get_mem_cgroup_from_mm(p->mm);
task_unlock(p);
} else {
/*
@@ -1500,8 +1489,6 @@
css_get(&curr->css);
rcu_read_unlock();
}
- if (!curr)
- return false;
/*
* We should check use_hierarchy of "memcg" not "curr". Because checking
* use_hierarchy of "curr" here make this function true if hierarchy is
@@ -2588,7 +2575,7 @@
}
-/* See __mem_cgroup_try_charge() for details */
+/* See mem_cgroup_try_charge() for details */
enum {
CHARGE_OK, /* success */
CHARGE_RETRY, /* need to retry but retry is not bad */
@@ -2661,45 +2648,34 @@
return CHARGE_NOMEM;
}
-/*
- * __mem_cgroup_try_charge() does
- * 1. detect memcg to be charged against from passed *mm and *ptr,
- * 2. update res_counter
- * 3. call memory reclaim if necessary.
+/**
+ * mem_cgroup_try_charge - try charging a memcg
+ * @memcg: memcg to charge
+ * @nr_pages: number of pages to charge
+ * @oom: trigger OOM if reclaim fails
*
- * In some special case, if the task is fatal, fatal_signal_pending() or
- * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
- * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
- * as possible without any hazards. 2: all pages should have a valid
- * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
- * pointer, that is treated as a charge to root_mem_cgroup.
- *
- * So __mem_cgroup_try_charge() will return
- * 0 ... on success, filling *ptr with a valid memcg pointer.
- * -ENOMEM ... charge failure because of resource limits.
- * -EINTR ... if thread is fatal. *ptr is filled with root_mem_cgroup.
- *
- * Unlike the exported interface, an "oom" parameter is added. if oom==true,
- * the oom-killer can be invoked.
+ * Returns 0 if @memcg was charged successfully, -EINTR if the charge
+ * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
*/
-static int __mem_cgroup_try_charge(struct mm_struct *mm,
- gfp_t gfp_mask,
- unsigned int nr_pages,
- struct mem_cgroup **ptr,
- bool oom)
+static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
+ gfp_t gfp_mask,
+ unsigned int nr_pages,
+ bool oom)
{
unsigned int batch = max(CHARGE_BATCH, nr_pages);
int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
- struct mem_cgroup *memcg = NULL;
int ret;
+ if (mem_cgroup_is_root(memcg))
+ goto done;
/*
- * Unlike gloval-vm's OOM-kill, we're not in memory shortage
- * in system level. So, allow to go ahead dying process in addition to
- * MEMDIE process.
+ * Unlike in global OOM situations, memcg is not in a physical
+ * memory shortage. Allow dying and OOM-killed tasks to
+ * bypass the last charges so that they can exit quickly and
+ * free their memory.
*/
- if (unlikely(test_thread_flag(TIF_MEMDIE)
- || fatal_signal_pending(current)))
+ if (unlikely(test_thread_flag(TIF_MEMDIE) ||
+ fatal_signal_pending(current)))
goto bypass;
if (unlikely(task_in_memcg_oom(current)))
@@ -2707,73 +2683,16 @@
if (gfp_mask & __GFP_NOFAIL)
oom = false;
-
- /*
- * We always charge the cgroup the mm_struct belongs to.
- * The mm_struct's mem_cgroup changes on task migration if the
- * thread group leader migrates. It's possible that mm is not
- * set, if so charge the root memcg (happens for pagecache usage).
- */
- if (!*ptr && !mm)
- *ptr = root_mem_cgroup;
again:
- if (*ptr) { /* css should be a valid one */
- memcg = *ptr;
- if (mem_cgroup_is_root(memcg))
- goto done;
- if (consume_stock(memcg, nr_pages))
- goto done;
- css_get(&memcg->css);
- } else {
- struct task_struct *p;
-
- rcu_read_lock();
- p = rcu_dereference(mm->owner);
- /*
- * Because we don't have task_lock(), "p" can exit.
- * In that case, "memcg" can point to root or p can be NULL with
- * race with swapoff. Then, we have small risk of mis-accouning.
- * But such kind of mis-account by race always happens because
- * we don't have cgroup_mutex(). It's overkill and we allo that
- * small race, here.
- * (*) swapoff at el will charge against mm-struct not against
- * task-struct. So, mm->owner can be NULL.
- */
- memcg = mem_cgroup_from_task(p);
- if (!memcg)
- memcg = root_mem_cgroup;
- if (mem_cgroup_is_root(memcg)) {
- rcu_read_unlock();
- goto done;
- }
- if (consume_stock(memcg, nr_pages)) {
- /*
- * It seems dagerous to access memcg without css_get().
- * But considering how consume_stok works, it's not
- * necessary. If consume_stock success, some charges
- * from this memcg are cached on this cpu. So, we
- * don't need to call css_get()/css_tryget() before
- * calling consume_stock().
- */
- rcu_read_unlock();
- goto done;
- }
- /* after here, we may be blocked. we need to get refcnt */
- if (!css_tryget(&memcg->css)) {
- rcu_read_unlock();
- goto again;
- }
- rcu_read_unlock();
- }
+ if (consume_stock(memcg, nr_pages))
+ goto done;
do {
bool invoke_oom = oom && !nr_oom_retries;
/* If killed, bypass charge */
- if (fatal_signal_pending(current)) {
- css_put(&memcg->css);
+ if (fatal_signal_pending(current))
goto bypass;
- }
ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
nr_pages, invoke_oom);
@@ -2782,17 +2701,12 @@
break;
case CHARGE_RETRY: /* not in OOM situation but retry */
batch = nr_pages;
- css_put(&memcg->css);
- memcg = NULL;
goto again;
case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
- css_put(&memcg->css);
goto nomem;
case CHARGE_NOMEM: /* OOM routine works */
- if (!oom || invoke_oom) {
- css_put(&memcg->css);
+ if (!oom || invoke_oom)
goto nomem;
- }
nr_oom_retries--;
break;
}
@@ -2800,20 +2714,44 @@
if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages);
- css_put(&memcg->css);
done:
- *ptr = memcg;
return 0;
nomem:
- if (!(gfp_mask & __GFP_NOFAIL)) {
- *ptr = NULL;
+ if (!(gfp_mask & __GFP_NOFAIL))
return -ENOMEM;
- }
bypass:
- *ptr = root_mem_cgroup;
return -EINTR;
}
+/**
+ * mem_cgroup_try_charge_mm - try charging a mm
+ * @mm: mm_struct to charge
+ * @nr_pages: number of pages to charge
+ * @oom: trigger OOM if reclaim fails
+ *
+ * Returns the charged mem_cgroup associated with the given mm_struct or
+ * NULL the charge failed.
+ */
+static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
+ gfp_t gfp_mask,
+ unsigned int nr_pages,
+ bool oom)
+
+{
+ struct mem_cgroup *memcg;
+ int ret;
+
+ memcg = get_mem_cgroup_from_mm(mm);
+ ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
+ css_put(&memcg->css);
+ if (ret == -EINTR)
+ memcg = root_mem_cgroup;
+ else if (ret)
+ memcg = NULL;
+
+ return memcg;
+}
+
/*
* Somemtimes we have to undo a charge we got by try_charge().
* This function is for that and do uncharge, put css's refcnt.
@@ -3009,20 +2947,17 @@
static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
{
struct res_counter *fail_res;
- struct mem_cgroup *_memcg;
int ret = 0;
ret = res_counter_charge(&memcg->kmem, size, &fail_res);
if (ret)
return ret;
- _memcg = memcg;
- ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
- &_memcg, oom_gfp_allowed(gfp));
-
+ ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
+ oom_gfp_allowed(gfp));
if (ret == -EINTR) {
/*
- * __mem_cgroup_try_charge() chosed to bypass to root due to
+ * mem_cgroup_try_charge() chosed to bypass to root due to
* OOM kill or fatal signal. Since our only options are to
* either fail the allocation or charge it to this cgroup, do
* it as a temporary condition. But we can't fail. From a
@@ -3032,7 +2967,7 @@
*
* This condition will only trigger if the task entered
* memcg_charge_kmem in a sane state, but was OOM-killed during
- * __mem_cgroup_try_charge() above. Tasks that were already
+ * mem_cgroup_try_charge() above. Tasks that were already
* dying when the allocation triggers should have been already
* directed to the root cgroup in memcontrol.h
*/
@@ -3159,6 +3094,29 @@
return 0;
}
+char *memcg_create_cache_name(struct mem_cgroup *memcg,
+ struct kmem_cache *root_cache)
+{
+ static char *buf = NULL;
+
+ /*
+ * We need a mutex here to protect the shared buffer. Since this is
+ * expected to be called only on cache creation, we can employ the
+ * slab_mutex for that purpose.
+ */
+ lockdep_assert_held(&slab_mutex);
+
+ if (!buf) {
+ buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
+ if (!buf)
+ return NULL;
+ }
+
+ cgroup_name(memcg->css.cgroup, buf, NAME_MAX + 1);
+ return kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
+ memcg_cache_id(memcg), buf);
+}
+
int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
struct kmem_cache *root_cache)
{
@@ -3182,6 +3140,7 @@
s->memcg_params->root_cache = root_cache;
INIT_WORK(&s->memcg_params->destroy,
kmem_cache_destroy_work_func);
+ css_get(&memcg->css);
} else
s->memcg_params->is_root_cache = true;
@@ -3190,6 +3149,10 @@
void memcg_free_cache_params(struct kmem_cache *s)
{
+ if (!s->memcg_params)
+ return;
+ if (!s->memcg_params->is_root_cache)
+ css_put(&s->memcg_params->memcg->css);
kfree(s->memcg_params);
}
@@ -3212,9 +3175,6 @@
memcg = s->memcg_params->memcg;
id = memcg_cache_id(memcg);
- css_get(&memcg->css);
-
-
/*
* Since readers won't lock (see cache_from_memcg_idx()), we need a
* barrier here to ensure nobody will see the kmem_cache partially
@@ -3263,10 +3223,8 @@
* after removing it from the memcg_slab_caches list, otherwise we can
* fail to convert memcg_params_to_cache() while traversing the list.
*/
- VM_BUG_ON(!root->memcg_params->memcg_caches[id]);
+ VM_BUG_ON(root->memcg_params->memcg_caches[id] != s);
root->memcg_params->memcg_caches[id] = NULL;
-
- css_put(&memcg->css);
}
/*
@@ -3363,55 +3321,10 @@
schedule_work(&cachep->memcg_params->destroy);
}
-static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
- struct kmem_cache *s)
-{
- struct kmem_cache *new = NULL;
- static char *tmp_path = NULL, *tmp_name = NULL;
- static DEFINE_MUTEX(mutex); /* protects tmp_name */
-
- BUG_ON(!memcg_can_account_kmem(memcg));
-
- mutex_lock(&mutex);
- /*
- * kmem_cache_create_memcg duplicates the given name and
- * cgroup_name for this name requires RCU context.
- * This static temporary buffer is used to prevent from
- * pointless shortliving allocation.
- */
- if (!tmp_path || !tmp_name) {
- if (!tmp_path)
- tmp_path = kmalloc(PATH_MAX, GFP_KERNEL);
- if (!tmp_name)
- tmp_name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
- if (!tmp_path || !tmp_name)
- goto out;
- }
-
- cgroup_name(memcg->css.cgroup, tmp_name, NAME_MAX + 1);
- snprintf(tmp_path, PATH_MAX, "%s(%d:%s)", s->name,
- memcg_cache_id(memcg), tmp_name);
-
- new = kmem_cache_create_memcg(memcg, tmp_path, s->object_size, s->align,
- (s->flags & ~SLAB_PANIC), s->ctor, s);
- if (new)
- new->allocflags |= __GFP_KMEMCG;
- else
- new = s;
-out:
- mutex_unlock(&mutex);
- return new;
-}
-
-void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
{
struct kmem_cache *c;
- int i;
-
- if (!s->memcg_params)
- return;
- if (!s->memcg_params->is_root_cache)
- return;
+ int i, failed = 0;
/*
* If the cache is being destroyed, we trust that there is no one else
@@ -3445,16 +3358,14 @@
c->memcg_params->dead = false;
cancel_work_sync(&c->memcg_params->destroy);
kmem_cache_destroy(c);
+
+ if (cache_from_memcg_idx(s, i))
+ failed++;
}
mutex_unlock(&activate_kmem_mutex);
+ return failed;
}
-struct create_work {
- struct mem_cgroup *memcg;
- struct kmem_cache *cachep;
- struct work_struct work;
-};
-
static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
{
struct kmem_cache *cachep;
@@ -3472,13 +3383,20 @@
mutex_unlock(&memcg->slab_caches_mutex);
}
+struct create_work {
+ struct mem_cgroup *memcg;
+ struct kmem_cache *cachep;
+ struct work_struct work;
+};
+
static void memcg_create_cache_work_func(struct work_struct *w)
{
- struct create_work *cw;
+ struct create_work *cw = container_of(w, struct create_work, work);
+ struct mem_cgroup *memcg = cw->memcg;
+ struct kmem_cache *cachep = cw->cachep;
- cw = container_of(w, struct create_work, work);
- memcg_create_kmem_cache(cw->memcg, cw->cachep);
- css_put(&cw->memcg->css);
+ kmem_cache_create_memcg(memcg, cachep);
+ css_put(&memcg->css);
kfree(cw);
}
@@ -3637,15 +3555,7 @@
if (!current->mm || current->memcg_kmem_skip_account)
return true;
- memcg = try_get_mem_cgroup_from_mm(current->mm);
-
- /*
- * very rare case described in mem_cgroup_from_task. Unfortunately there
- * isn't much we can do without complicating this too much, and it would
- * be gfp-dependent anyway. Just let it go
- */
- if (unlikely(!memcg))
- return true;
+ memcg = get_mem_cgroup_from_mm(current->mm);
if (!memcg_can_account_kmem(memcg)) {
css_put(&memcg->css);
@@ -3748,19 +3658,6 @@
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline
-void mem_cgroup_move_account_page_stat(struct mem_cgroup *from,
- struct mem_cgroup *to,
- unsigned int nr_pages,
- enum mem_cgroup_stat_index idx)
-{
- /* Update stat data for mem_cgroup */
- preempt_disable();
- __this_cpu_sub(from->stat->count[idx], nr_pages);
- __this_cpu_add(to->stat->count[idx], nr_pages);
- preempt_enable();
-}
-
/**
* mem_cgroup_move_account - move account of the page
* @page: the page
@@ -3806,13 +3703,19 @@
move_lock_mem_cgroup(from, &flags);
- if (!anon && page_mapped(page))
- mem_cgroup_move_account_page_stat(from, to, nr_pages,
- MEM_CGROUP_STAT_FILE_MAPPED);
+ if (!anon && page_mapped(page)) {
+ __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
+ nr_pages);
+ __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
+ nr_pages);
+ }
- if (PageWriteback(page))
- mem_cgroup_move_account_page_stat(from, to, nr_pages,
- MEM_CGROUP_STAT_WRITEBACK);
+ if (PageWriteback(page)) {
+ __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_WRITEBACK],
+ nr_pages);
+ __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_WRITEBACK],
+ nr_pages);
+ }
mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
@@ -3898,19 +3801,19 @@
return ret;
}
-/*
- * Charge the memory controller for page usage.
- * Return
- * 0 if the charge was successful
- * < 0 if the cgroup is over its limit
- */
-static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask, enum charge_type ctype)
+int mem_cgroup_charge_anon(struct page *page,
+ struct mm_struct *mm, gfp_t gfp_mask)
{
- struct mem_cgroup *memcg = NULL;
unsigned int nr_pages = 1;
+ struct mem_cgroup *memcg;
bool oom = true;
- int ret;
+
+ if (mem_cgroup_disabled())
+ return 0;
+
+ VM_BUG_ON_PAGE(page_mapped(page), page);
+ VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
+ VM_BUG_ON(!mm);
if (PageTransHuge(page)) {
nr_pages <<= compound_order(page);
@@ -3922,25 +3825,14 @@
oom = false;
}
- ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
- if (ret == -ENOMEM)
- return ret;
- __mem_cgroup_commit_charge(memcg, page, nr_pages, ctype, false);
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
+ if (!memcg)
+ return -ENOMEM;
+ __mem_cgroup_commit_charge(memcg, page, nr_pages,
+ MEM_CGROUP_CHARGE_TYPE_ANON, false);
return 0;
}
-int mem_cgroup_newpage_charge(struct page *page,
- struct mm_struct *mm, gfp_t gfp_mask)
-{
- if (mem_cgroup_disabled())
- return 0;
- VM_BUG_ON_PAGE(page_mapped(page), page);
- VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
- VM_BUG_ON(!mm);
- return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_ANON);
-}
-
/*
* While swap-in, try_charge -> commit or cancel, the page is locked.
* And when try_charge() successfully returns, one refcnt to memcg without
@@ -3952,7 +3844,7 @@
gfp_t mask,
struct mem_cgroup **memcgp)
{
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg = NULL;
struct page_cgroup *pc;
int ret;
@@ -3965,31 +3857,29 @@
* in turn serializes uncharging.
*/
if (PageCgroupUsed(pc))
- return 0;
- if (!do_swap_account)
- goto charge_cur_mm;
- memcg = try_get_mem_cgroup_from_page(page);
+ goto out;
+ if (do_swap_account)
+ memcg = try_get_mem_cgroup_from_page(page);
if (!memcg)
- goto charge_cur_mm;
- *memcgp = memcg;
- ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
+ memcg = get_mem_cgroup_from_mm(mm);
+ ret = mem_cgroup_try_charge(memcg, mask, 1, true);
css_put(&memcg->css);
if (ret == -EINTR)
- ret = 0;
- return ret;
-charge_cur_mm:
- ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
- if (ret == -EINTR)
- ret = 0;
- return ret;
+ memcg = root_mem_cgroup;
+ else if (ret)
+ return ret;
+out:
+ *memcgp = memcg;
+ return 0;
}
int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
gfp_t gfp_mask, struct mem_cgroup **memcgp)
{
- *memcgp = NULL;
- if (mem_cgroup_disabled())
+ if (mem_cgroup_disabled()) {
+ *memcgp = NULL;
return 0;
+ }
/*
* A racing thread's fault, or swapoff, may have already
* updated the pte, and even removed page from swap cache: in
@@ -3997,12 +3887,13 @@
* there's also a KSM case which does need to charge the page.
*/
if (!PageSwapCache(page)) {
- int ret;
+ struct mem_cgroup *memcg;
- ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true);
- if (ret == -EINTR)
- ret = 0;
- return ret;
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+ if (!memcg)
+ return -ENOMEM;
+ *memcgp = memcg;
+ return 0;
}
return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
}
@@ -4046,11 +3937,11 @@
MEM_CGROUP_CHARGE_TYPE_ANON);
}
-int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
+int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
- struct mem_cgroup *memcg = NULL;
enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
+ struct mem_cgroup *memcg;
int ret;
if (mem_cgroup_disabled())
@@ -4058,15 +3949,28 @@
if (PageCompound(page))
return 0;
- if (!PageSwapCache(page))
- ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
- else { /* page is swapcache/shmem */
+ if (PageSwapCache(page)) { /* shmem */
ret = __mem_cgroup_try_charge_swapin(mm, page,
gfp_mask, &memcg);
- if (!ret)
- __mem_cgroup_commit_charge_swapin(page, memcg, type);
+ if (ret)
+ return ret;
+ __mem_cgroup_commit_charge_swapin(page, memcg, type);
+ return 0;
}
- return ret;
+
+ /*
+ * Page cache insertions can happen without an actual mm
+ * context, e.g. during disk probing on boot.
+ */
+ if (unlikely(!mm))
+ memcg = root_mem_cgroup;
+ else {
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+ if (!memcg)
+ return -ENOMEM;
+ }
+ __mem_cgroup_commit_charge(memcg, page, 1, type, false);
+ return 0;
}
static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
@@ -6678,8 +6582,7 @@
batch_count = PRECHARGE_COUNT_AT_ONCE;
cond_resched();
}
- ret = __mem_cgroup_try_charge(NULL,
- GFP_KERNEL, 1, &memcg, false);
+ ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
if (ret)
/* mem_cgroup_clear_mc() will do uncharge later */
return ret;
diff --git a/mm/memory.c b/mm/memory.c
index 82c1e4c..d0f0bef 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -60,6 +60,7 @@
#include <linux/migrate.h>
#include <linux/string.h>
#include <linux/dma-debug.h>
+#include <linux/debugfs.h>
#include <asm/io.h>
#include <asm/pgalloc.h>
@@ -1320,9 +1321,9 @@
* It is undesirable to test vma->vm_file as it
* should be non-null for valid hugetlb area.
* However, vm_file will be NULL in the error
- * cleanup path of do_mmap_pgoff. When
+ * cleanup path of mmap_region. When
* hugetlbfs ->mmap method fails,
- * do_mmap_pgoff() nullifies vma->vm_file
+ * mmap_region() nullifies vma->vm_file
* before calling this function to clean up.
* Since no pte has actually been setup, it is
* safe to do nothing in this case.
@@ -2781,7 +2782,7 @@
*/
if (!page_mkwrite) {
wait_on_page_locked(dirty_page);
- set_page_dirty_balance(dirty_page, page_mkwrite);
+ set_page_dirty_balance(dirty_page);
/* file_update_time outside page_lock */
if (vma->vm_file)
file_update_time(vma->vm_file);
@@ -2827,7 +2828,7 @@
}
__SetPageUptodate(new_page);
- if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
+ if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
goto oom_free_new;
mmun_start = address & PAGE_MASK;
@@ -3280,7 +3281,7 @@
*/
__SetPageUptodate(page);
- if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))
+ if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
goto oom_free_page;
entry = mk_pte(page, vma->vm_page_prot);
@@ -3342,7 +3343,22 @@
return ret;
}
-static void do_set_pte(struct vm_area_struct *vma, unsigned long address,
+/**
+ * do_set_pte - setup new PTE entry for given page and add reverse page mapping.
+ *
+ * @vma: virtual memory area
+ * @address: user virtual address
+ * @page: page to map
+ * @pte: pointer to target page table entry
+ * @write: true, if new entry is writable
+ * @anon: true, if it's anonymous page
+ *
+ * Caller must hold page table lock relevant for @pte.
+ *
+ * Target users are page handler itself and implementations of
+ * vm_ops->map_pages.
+ */
+void do_set_pte(struct vm_area_struct *vma, unsigned long address,
struct page *page, pte_t *pte, bool write, bool anon)
{
pte_t entry;
@@ -3366,6 +3382,105 @@
update_mmu_cache(vma, address, pte);
}
+#define FAULT_AROUND_ORDER 4
+
+#ifdef CONFIG_DEBUG_FS
+static unsigned int fault_around_order = FAULT_AROUND_ORDER;
+
+static int fault_around_order_get(void *data, u64 *val)
+{
+ *val = fault_around_order;
+ return 0;
+}
+
+static int fault_around_order_set(void *data, u64 val)
+{
+ BUILD_BUG_ON((1UL << FAULT_AROUND_ORDER) > PTRS_PER_PTE);
+ if (1UL << val > PTRS_PER_PTE)
+ return -EINVAL;
+ fault_around_order = val;
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fault_around_order_fops,
+ fault_around_order_get, fault_around_order_set, "%llu\n");
+
+static int __init fault_around_debugfs(void)
+{
+ void *ret;
+
+ ret = debugfs_create_file("fault_around_order", 0644, NULL, NULL,
+ &fault_around_order_fops);
+ if (!ret)
+ pr_warn("Failed to create fault_around_order in debugfs");
+ return 0;
+}
+late_initcall(fault_around_debugfs);
+
+static inline unsigned long fault_around_pages(void)
+{
+ return 1UL << fault_around_order;
+}
+
+static inline unsigned long fault_around_mask(void)
+{
+ return ~((1UL << (PAGE_SHIFT + fault_around_order)) - 1);
+}
+#else
+static inline unsigned long fault_around_pages(void)
+{
+ unsigned long nr_pages;
+
+ nr_pages = 1UL << FAULT_AROUND_ORDER;
+ BUILD_BUG_ON(nr_pages > PTRS_PER_PTE);
+ return nr_pages;
+}
+
+static inline unsigned long fault_around_mask(void)
+{
+ return ~((1UL << (PAGE_SHIFT + FAULT_AROUND_ORDER)) - 1);
+}
+#endif
+
+static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
+ pte_t *pte, pgoff_t pgoff, unsigned int flags)
+{
+ unsigned long start_addr;
+ pgoff_t max_pgoff;
+ struct vm_fault vmf;
+ int off;
+
+ start_addr = max(address & fault_around_mask(), vma->vm_start);
+ off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+ pte -= off;
+ pgoff -= off;
+
+ /*
+ * max_pgoff is either end of page table or end of vma
+ * or fault_around_pages() from pgoff, depending what is neast.
+ */
+ max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
+ PTRS_PER_PTE - 1;
+ max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
+ pgoff + fault_around_pages() - 1);
+
+ /* Check if it makes any sense to call ->map_pages */
+ while (!pte_none(*pte)) {
+ if (++pgoff > max_pgoff)
+ return;
+ start_addr += PAGE_SIZE;
+ if (start_addr >= vma->vm_end)
+ return;
+ pte++;
+ }
+
+ vmf.virtual_address = (void __user *) start_addr;
+ vmf.pte = pte;
+ vmf.pgoff = pgoff;
+ vmf.max_pgoff = max_pgoff;
+ vmf.flags = flags;
+ vma->vm_ops->map_pages(vma, &vmf);
+}
+
static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
@@ -3373,7 +3488,20 @@
struct page *fault_page;
spinlock_t *ptl;
pte_t *pte;
- int ret;
+ int ret = 0;
+
+ /*
+ * Let's call ->map_pages() first and use ->fault() as fallback
+ * if page by the offset is not ready to be mapped (cold cache or
+ * something).
+ */
+ if (vma->vm_ops->map_pages) {
+ pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+ do_fault_around(vma, address, pte, pgoff, flags);
+ if (!pte_same(*pte, orig_pte))
+ goto unlock_out;
+ pte_unmap_unlock(pte, ptl);
+ }
ret = __do_fault(vma, address, pgoff, flags, &fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
@@ -3387,8 +3515,9 @@
return ret;
}
do_set_pte(vma, address, fault_page, pte, false, false);
- pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
+unlock_out:
+ pte_unmap_unlock(pte, ptl);
return ret;
}
@@ -3408,7 +3537,7 @@
if (!new_page)
return VM_FAULT_OOM;
- if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) {
+ if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
page_cache_release(new_page);
return VM_FAULT_OOM;
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e3ab028..78e1472 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -795,36 +795,6 @@
return err;
}
-/*
- * Update task->flags PF_MEMPOLICY bit: set iff non-default
- * mempolicy. Allows more rapid checking of this (combined perhaps
- * with other PF_* flag bits) on memory allocation hot code paths.
- *
- * If called from outside this file, the task 'p' should -only- be
- * a newly forked child not yet visible on the task list, because
- * manipulating the task flags of a visible task is not safe.
- *
- * The above limitation is why this routine has the funny name
- * mpol_fix_fork_child_flag().
- *
- * It is also safe to call this with a task pointer of current,
- * which the static wrapper mpol_set_task_struct_flag() does,
- * for use within this file.
- */
-
-void mpol_fix_fork_child_flag(struct task_struct *p)
-{
- if (p->mempolicy)
- p->flags |= PF_MEMPOLICY;
- else
- p->flags &= ~PF_MEMPOLICY;
-}
-
-static void mpol_set_task_struct_flag(void)
-{
- mpol_fix_fork_child_flag(current);
-}
-
/* Set the process memory policy */
static long do_set_mempolicy(unsigned short mode, unsigned short flags,
nodemask_t *nodes)
@@ -861,7 +831,6 @@
}
old = current->mempolicy;
current->mempolicy = new;
- mpol_set_task_struct_flag();
if (new && new->mode == MPOL_INTERLEAVE &&
nodes_weight(new->v.nodes))
current->il_next = first_node(new->v.nodes);
@@ -1782,21 +1751,18 @@
/*
* Depending on the memory policy provide a node from which to allocate the
* next slab entry.
- * @policy must be protected by freeing by the caller. If @policy is
- * the current task's mempolicy, this protection is implicit, as only the
- * task can change it's policy. The system default policy requires no
- * such protection.
*/
-unsigned slab_node(void)
+unsigned int mempolicy_slab_node(void)
{
struct mempolicy *policy;
+ int node = numa_mem_id();
if (in_interrupt())
- return numa_node_id();
+ return node;
policy = current->mempolicy;
if (!policy || policy->flags & MPOL_F_LOCAL)
- return numa_node_id();
+ return node;
switch (policy->mode) {
case MPOL_PREFERRED:
@@ -1816,11 +1782,11 @@
struct zonelist *zonelist;
struct zone *zone;
enum zone_type highest_zoneidx = gfp_zone(GFP_KERNEL);
- zonelist = &NODE_DATA(numa_node_id())->node_zonelists[0];
+ zonelist = &NODE_DATA(node)->node_zonelists[0];
(void)first_zones_zonelist(zonelist, highest_zoneidx,
&policy->v.nodes,
&zone);
- return zone ? zone->node : numa_node_id();
+ return zone ? zone->node : node;
}
default:
diff --git a/mm/mempool.c b/mm/mempool.c
index 659aa42..905434f 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -304,9 +304,9 @@
* ensures that there will be frees which return elements to the
* pool waking up the waiters.
*/
- if (pool->curr_nr < pool->min_nr) {
+ if (unlikely(pool->curr_nr < pool->min_nr)) {
spin_lock_irqsave(&pool->lock, flags);
- if (pool->curr_nr < pool->min_nr) {
+ if (likely(pool->curr_nr < pool->min_nr)) {
add_element(pool, element);
spin_unlock_irqrestore(&pool->lock, flags);
wake_up(&pool->wait);
diff --git a/mm/mlock.c b/mm/mlock.c
index 4e1a6816..b1eb536 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -79,6 +79,7 @@
*/
void mlock_vma_page(struct page *page)
{
+ /* Serialize with page migration */
BUG_ON(!PageLocked(page));
if (!TestSetPageMlocked(page)) {
@@ -174,6 +175,7 @@
unsigned int nr_pages;
struct zone *zone = page_zone(page);
+ /* For try_to_munlock() and to serialize with page migration */
BUG_ON(!PageLocked(page));
/*
diff --git a/mm/mmap.c b/mm/mmap.c
index 46433e1..b1202cf 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
@@ -681,8 +682,9 @@
prev->vm_next = next = vma->vm_next;
if (next)
next->vm_prev = prev;
- if (mm->mmap_cache == vma)
- mm->mmap_cache = prev;
+
+ /* Kill the cache */
+ vmacache_invalidate(mm);
}
/*
@@ -1989,34 +1991,33 @@
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
- struct vm_area_struct *vma = NULL;
+ struct rb_node *rb_node;
+ struct vm_area_struct *vma;
/* Check the cache first. */
- /* (Cache hit rate is typically around 35%.) */
- vma = ACCESS_ONCE(mm->mmap_cache);
- if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
- struct rb_node *rb_node;
+ vma = vmacache_find(mm, addr);
+ if (likely(vma))
+ return vma;
- rb_node = mm->mm_rb.rb_node;
- vma = NULL;
+ rb_node = mm->mm_rb.rb_node;
+ vma = NULL;
- while (rb_node) {
- struct vm_area_struct *vma_tmp;
+ while (rb_node) {
+ struct vm_area_struct *tmp;
- vma_tmp = rb_entry(rb_node,
- struct vm_area_struct, vm_rb);
+ tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
- if (vma_tmp->vm_end > addr) {
- vma = vma_tmp;
- if (vma_tmp->vm_start <= addr)
- break;
- rb_node = rb_node->rb_left;
- } else
- rb_node = rb_node->rb_right;
- }
- if (vma)
- mm->mmap_cache = vma;
+ if (tmp->vm_end > addr) {
+ vma = tmp;
+ if (tmp->vm_start <= addr)
+ break;
+ rb_node = rb_node->rb_left;
+ } else
+ rb_node = rb_node->rb_right;
}
+
+ if (vma)
+ vmacache_update(addr, vma);
return vma;
}
@@ -2388,7 +2389,9 @@
} else
mm->highest_vm_end = prev ? prev->vm_end : 0;
tail_vma->vm_next = NULL;
- mm->mmap_cache = NULL; /* Kill the cache. */
+
+ /* Kill the cache */
+ vmacache_invalidate(mm);
}
/*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 769a67a..c43d557 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -36,6 +36,34 @@
}
#endif
+/*
+ * For a prot_numa update we only hold mmap_sem for read so there is a
+ * potential race with faulting where a pmd was temporarily none. This
+ * function checks for a transhuge pmd under the appropriate lock. It
+ * returns a pte if it was successfully locked or NULL if it raced with
+ * a transhuge insertion.
+ */
+static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, int prot_numa, spinlock_t **ptl)
+{
+ pte_t *pte;
+ spinlock_t *pmdl;
+
+ /* !prot_numa is protected by mmap_sem held for write */
+ if (!prot_numa)
+ return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
+
+ pmdl = pmd_lock(vma->vm_mm, pmd);
+ if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) {
+ spin_unlock(pmdl);
+ return NULL;
+ }
+
+ pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
+ spin_unlock(pmdl);
+ return pte;
+}
+
static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end, pgprot_t newprot,
int dirty_accountable, int prot_numa)
@@ -45,7 +73,10 @@
spinlock_t *ptl;
unsigned long pages = 0;
- pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl);
+ if (!pte)
+ return 0;
+
arch_enter_lazy_mmu_mode();
do {
oldpte = *pte;
@@ -109,15 +140,26 @@
pgprot_t newprot, int dirty_accountable, int prot_numa)
{
pmd_t *pmd;
+ struct mm_struct *mm = vma->vm_mm;
unsigned long next;
unsigned long pages = 0;
unsigned long nr_huge_updates = 0;
+ unsigned long mni_start = 0;
pmd = pmd_offset(pud, addr);
do {
unsigned long this_pages;
next = pmd_addr_end(addr, end);
+ if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd))
+ continue;
+
+ /* invoke the mmu notifier if the pmd is populated */
+ if (!mni_start) {
+ mni_start = addr;
+ mmu_notifier_invalidate_range_start(mm, mni_start, end);
+ }
+
if (pmd_trans_huge(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE)
split_huge_page_pmd(vma, addr, pmd);
@@ -130,18 +172,21 @@
pages += HPAGE_PMD_NR;
nr_huge_updates++;
}
+
+ /* huge pmd was handled */
continue;
}
}
- /* fall through */
+ /* fall through, the trans huge pmd just split */
}
- if (pmd_none_or_clear_bad(pmd))
- continue;
this_pages = change_pte_range(vma, pmd, addr, next, newprot,
dirty_accountable, prot_numa);
pages += this_pages;
} while (pmd++, addr = next, addr != end);
+ if (mni_start)
+ mmu_notifier_invalidate_range_end(mm, mni_start, end);
+
if (nr_huge_updates)
count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
return pages;
@@ -201,15 +246,12 @@
unsigned long end, pgprot_t newprot,
int dirty_accountable, int prot_numa)
{
- struct mm_struct *mm = vma->vm_mm;
unsigned long pages;
- mmu_notifier_invalidate_range_start(mm, start, end);
if (is_vm_hugetlb_page(vma))
pages = hugetlb_change_protection(vma, start, end, newprot);
else
pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
- mmu_notifier_invalidate_range_end(mm, start, end);
return pages;
}
diff --git a/mm/nommu.c b/mm/nommu.c
index a554e5a..85f8d66 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -15,6 +15,7 @@
#include <linux/export.h>
#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/mman.h>
#include <linux/swap.h>
#include <linux/file.h>
@@ -24,6 +25,7 @@
#include <linux/vmalloc.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
+#include <linux/compiler.h>
#include <linux/mount.h>
#include <linux/personality.h>
#include <linux/security.h>
@@ -296,7 +298,7 @@
count = -(unsigned long) addr;
memcpy(addr, buf, count);
- return(count);
+ return count;
}
/*
@@ -459,7 +461,7 @@
* Implement a stub for vmalloc_sync_all() if the architecture chose not to
* have one.
*/
-void __attribute__((weak)) vmalloc_sync_all(void)
+void __weak vmalloc_sync_all(void)
{
}
@@ -768,16 +770,23 @@
*/
static void delete_vma_from_mm(struct vm_area_struct *vma)
{
+ int i;
struct address_space *mapping;
struct mm_struct *mm = vma->vm_mm;
+ struct task_struct *curr = current;
kenter("%p", vma);
protect_vma(vma, 0);
mm->map_count--;
- if (mm->mmap_cache == vma)
- mm->mmap_cache = NULL;
+ for (i = 0; i < VMACACHE_SIZE; i++) {
+ /* if the vma is cached, invalidate the entire cache */
+ if (curr->vmacache[i] == vma) {
+ vmacache_invalidate(curr->mm);
+ break;
+ }
+ }
/* remove the VMA from the mapping */
if (vma->vm_file) {
@@ -825,8 +834,8 @@
struct vm_area_struct *vma;
/* check the cache first */
- vma = ACCESS_ONCE(mm->mmap_cache);
- if (vma && vma->vm_start <= addr && vma->vm_end > addr)
+ vma = vmacache_find(mm, addr);
+ if (likely(vma))
return vma;
/* trawl the list (there may be multiple mappings in which addr
@@ -835,7 +844,7 @@
if (vma->vm_start > addr)
return NULL;
if (vma->vm_end > addr) {
- mm->mmap_cache = vma;
+ vmacache_update(addr, vma);
return vma;
}
}
@@ -874,8 +883,8 @@
unsigned long end = addr + len;
/* check the cache first */
- vma = mm->mmap_cache;
- if (vma && vma->vm_start == addr && vma->vm_end == end)
+ vma = vmacache_find_exact(mm, addr, end);
+ if (vma)
return vma;
/* trawl the list (there may be multiple mappings in which addr
@@ -886,7 +895,7 @@
if (vma->vm_start > addr)
return NULL;
if (vma->vm_end == end) {
- mm->mmap_cache = vma;
+ vmacache_update(addr, vma);
return vma;
}
}
@@ -1003,8 +1012,7 @@
/* we mustn't privatise shared mappings */
capabilities &= ~BDI_CAP_MAP_COPY;
- }
- else {
+ } else {
/* we're going to read the file into private memory we
* allocate */
if (!(capabilities & BDI_CAP_MAP_COPY))
@@ -1035,23 +1043,20 @@
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
if (prot & PROT_EXEC)
return -EPERM;
- }
- else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
+ } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
/* handle implication of PROT_EXEC by PROT_READ */
if (current->personality & READ_IMPLIES_EXEC) {
if (capabilities & BDI_CAP_EXEC_MAP)
prot |= PROT_EXEC;
}
- }
- else if ((prot & PROT_READ) &&
+ } else if ((prot & PROT_READ) &&
(prot & PROT_EXEC) &&
!(capabilities & BDI_CAP_EXEC_MAP)
) {
/* backing file is not executable, try to copy */
capabilities &= ~BDI_CAP_MAP_DIRECT;
}
- }
- else {
+ } else {
/* anonymous mappings are always memory backed and can be
* privately mapped
*/
@@ -1659,7 +1664,7 @@
/* find the first potentially overlapping VMA */
vma = find_vma(mm, start);
if (!vma) {
- static int limit = 0;
+ static int limit;
if (limit < 5) {
printk(KERN_WARNING
"munmap of memory not mmapped by process %d"
@@ -1985,6 +1990,12 @@
}
EXPORT_SYMBOL(filemap_fault);
+void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ BUG();
+}
+EXPORT_SYMBOL(filemap_map_pages);
+
int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
unsigned long size, pgoff_t pgoff)
{
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7106cb1..ef41349 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1562,9 +1562,9 @@
bdi_start_background_writeback(bdi);
}
-void set_page_dirty_balance(struct page *page, int page_mkwrite)
+void set_page_dirty_balance(struct page *page)
{
- if (set_page_dirty(page) || page_mkwrite) {
+ if (set_page_dirty(page)) {
struct address_space *mapping = page_mapping(page);
if (mapping)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 979378d..5dba293 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -295,7 +295,8 @@
}
#endif
-static void bad_page(struct page *page, char *reason, unsigned long bad_flags)
+static void bad_page(struct page *page, const char *reason,
+ unsigned long bad_flags)
{
static unsigned long resume;
static unsigned long nr_shown;
@@ -623,7 +624,7 @@
static inline int free_pages_check(struct page *page)
{
- char *bad_reason = NULL;
+ const char *bad_reason = NULL;
unsigned long bad_flags = 0;
if (unlikely(page_mapcount(page)))
@@ -859,7 +860,7 @@
*/
static inline int check_new_page(struct page *page)
{
- char *bad_reason = NULL;
+ const char *bad_reason = NULL;
unsigned long bad_flags = 0;
if (unlikely(page_mapcount(page)))
@@ -1238,15 +1239,6 @@
}
local_irq_restore(flags);
}
-static bool gfp_thisnode_allocation(gfp_t gfp_mask)
-{
- return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
-}
-#else
-static bool gfp_thisnode_allocation(gfp_t gfp_mask)
-{
- return false;
-}
#endif
/*
@@ -1583,12 +1575,7 @@
get_pageblock_migratetype(page));
}
- /*
- * NOTE: GFP_THISNODE allocations do not partake in the kswapd
- * aging protocol, so they can't be fair.
- */
- if (!gfp_thisnode_allocation(gfp_flags))
- __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+ __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1870,7 +1857,7 @@
{
int i;
- for_each_online_node(i)
+ for_each_node_state(i, N_MEMORY)
if (node_distance(nid, i) <= RECLAIM_DISTANCE)
node_set(i, NODE_DATA(nid)->reclaim_nodes);
else
@@ -1954,23 +1941,12 @@
* zone size to ensure fair page aging. The zone a
* page was allocated in should have no effect on the
* time the page has in memory before being reclaimed.
- *
- * Try to stay in local zones in the fastpath. If
- * that fails, the slowpath is entered, which will do
- * another pass starting with the local zones, but
- * ultimately fall back to remote zones that do not
- * partake in the fairness round-robin cycle of this
- * zonelist.
- *
- * NOTE: GFP_THISNODE allocations do not partake in
- * the kswapd aging protocol, so they can't be fair.
*/
- if ((alloc_flags & ALLOC_WMARK_LOW) &&
- !gfp_thisnode_allocation(gfp_mask)) {
- if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
- continue;
+ if (alloc_flags & ALLOC_FAIR) {
if (!zone_local(preferred_zone, zone))
continue;
+ if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+ continue;
}
/*
* When allocating a page cache page for writing, we
@@ -2408,7 +2384,29 @@
return page;
}
-static void prepare_slowpath(gfp_t gfp_mask, unsigned int order,
+static void reset_alloc_batches(struct zonelist *zonelist,
+ enum zone_type high_zoneidx,
+ struct zone *preferred_zone)
+{
+ struct zoneref *z;
+ struct zone *zone;
+
+ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+ /*
+ * Only reset the batches of zones that were actually
+ * considered in the fairness pass, we don't want to
+ * trash fairness information for zones that are not
+ * actually part of this zonelist's round-robin cycle.
+ */
+ if (!zone_local(preferred_zone, zone))
+ continue;
+ mod_zone_page_state(zone, NR_ALLOC_BATCH,
+ high_wmark_pages(zone) - low_wmark_pages(zone) -
+ atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
+ }
+}
+
+static void wake_all_kswapds(unsigned int order,
struct zonelist *zonelist,
enum zone_type high_zoneidx,
struct zone *preferred_zone)
@@ -2416,22 +2414,8 @@
struct zoneref *z;
struct zone *zone;
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
- if (!(gfp_mask & __GFP_NO_KSWAPD))
- wakeup_kswapd(zone, order, zone_idx(preferred_zone));
- /*
- * Only reset the batches of zones that were actually
- * considered in the fast path, we don't want to
- * thrash fairness information for zones that are not
- * actually part of this zonelist's round-robin cycle.
- */
- if (!zone_local(preferred_zone, zone))
- continue;
- mod_zone_page_state(zone, NR_ALLOC_BATCH,
- high_wmark_pages(zone) -
- low_wmark_pages(zone) -
- zone_page_state(zone, NR_ALLOC_BATCH));
- }
+ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
+ wakeup_kswapd(zone, order, zone_idx(preferred_zone));
}
static inline int
@@ -2522,12 +2506,13 @@
* allowed per node queues are empty and that nodes are
* over allocated.
*/
- if (gfp_thisnode_allocation(gfp_mask))
+ if (IS_ENABLED(CONFIG_NUMA) &&
+ (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
goto nopage;
restart:
- prepare_slowpath(gfp_mask, order, zonelist,
- high_zoneidx, preferred_zone);
+ if (!(gfp_mask & __GFP_NO_KSWAPD))
+ wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone);
/*
* OK, we're below the kswapd watermark and have kicked background
@@ -2711,7 +2696,7 @@
struct page *page = NULL;
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
- int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
+ int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask;
@@ -2752,12 +2737,29 @@
if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
#endif
+retry:
/* First allocation attempt */
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
zonelist, high_zoneidx, alloc_flags,
preferred_zone, migratetype);
if (unlikely(!page)) {
/*
+ * The first pass makes sure allocations are spread
+ * fairly within the local node. However, the local
+ * node might have free pages left after the fairness
+ * batches are exhausted, and remote zones haven't
+ * even been considered yet. Try once more without
+ * fairness, and include remote zones now, before
+ * entering the slowpath and waking kswapd: prefer
+ * spilling to a remote zone over swapping locally.
+ */
+ if (alloc_flags & ALLOC_FAIR) {
+ reset_alloc_batches(zonelist, high_zoneidx,
+ preferred_zone);
+ alloc_flags &= ~ALLOC_FAIR;
+ goto retry;
+ }
+ /*
* Runtime PM, block IO and its error handling path
* can deadlock because I/O on the device might not
* complete.
@@ -4919,7 +4921,8 @@
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
- init_zone_allows_reclaim(nid);
+ if (node_state(nid, N_MEMORY))
+ init_zone_allows_reclaim(nid);
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
#endif
@@ -5070,7 +5073,7 @@
nodemask_t saved_node_state = node_states[N_MEMORY];
unsigned long totalpages = early_calculate_totalpages();
int usable_nodes = nodes_weight(node_states[N_MEMORY]);
- struct memblock_type *type = &memblock.memory;
+ struct memblock_region *r;
/* Need to find movable_zone earlier when movable_node is specified. */
find_usable_zone_for_movable();
@@ -5080,13 +5083,13 @@
* options.
*/
if (movable_node_is_enabled()) {
- for (i = 0; i < type->cnt; i++) {
- if (!memblock_is_hotpluggable(&type->regions[i]))
+ for_each_memblock(memory, r) {
+ if (!memblock_is_hotpluggable(r))
continue;
- nid = type->regions[i].nid;
+ nid = r->nid;
- usable_startpfn = PFN_DOWN(type->regions[i].base);
+ usable_startpfn = PFN_DOWN(r->base);
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
min(usable_startpfn, zone_movable_pfn[nid]) :
usable_startpfn;
@@ -6544,7 +6547,8 @@
printk(")\n");
}
-void dump_page_badflags(struct page *page, char *reason, unsigned long badflags)
+void dump_page_badflags(struct page *page, const char *reason,
+ unsigned long badflags)
{
printk(KERN_ALERT
"page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
@@ -6560,8 +6564,8 @@
mem_cgroup_print_bad_page(page);
}
-void dump_page(struct page *page, char *reason)
+void dump_page(struct page *page, const char *reason)
{
dump_page_badflags(page, reason, 0);
}
-EXPORT_SYMBOL_GPL(dump_page);
+EXPORT_SYMBOL(dump_page);
diff --git a/mm/readahead.c b/mm/readahead.c
index 29c5e1a..0ca36a7 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -8,9 +8,7 @@
*/
#include <linux/kernel.h>
-#include <linux/fs.h>
#include <linux/gfp.h>
-#include <linux/mm.h>
#include <linux/export.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
@@ -20,6 +18,8 @@
#include <linux/syscalls.h>
#include <linux/file.h>
+#include "internal.h"
+
/*
* Initialise a struct file's readahead state. Assumes that the caller has
* memset *ra to zero.
@@ -149,8 +149,7 @@
*
* Returns the number of pages requested, or the maximum amount of I/O allowed.
*/
-static int
-__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
+int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
pgoff_t offset, unsigned long nr_to_read,
unsigned long lookahead_size)
{
@@ -244,20 +243,6 @@
}
/*
- * Submit IO for the read-ahead request in file_ra_state.
- */
-unsigned long ra_submit(struct file_ra_state *ra,
- struct address_space *mapping, struct file *filp)
-{
- int actual;
-
- actual = __do_page_cache_readahead(mapping, filp,
- ra->start, ra->size, ra->async_size);
-
- return actual;
-}
-
-/*
* Set the initial window size, round to next power of 2 and square
* for small size, x 4 for medium, and x 2 for large
* for 128k (32 page) max ra
diff --git a/mm/rmap.c b/mm/rmap.c
index 11cf322..9c3e773 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1332,9 +1332,19 @@
BUG_ON(!page || PageAnon(page));
if (locked_vma) {
- mlock_vma_page(page); /* no-op if already mlocked */
- if (page == check_page)
+ if (page == check_page) {
+ /* we know we have check_page locked */
+ mlock_vma_page(page);
ret = SWAP_MLOCK;
+ } else if (trylock_page(page)) {
+ /*
+ * If we can lock the page, perform mlock.
+ * Otherwise leave the page alone, it will be
+ * eventually encountered again later.
+ */
+ mlock_vma_page(page);
+ unlock_page(page);
+ }
continue; /* don't unmap */
}
diff --git a/mm/shmem.c b/mm/shmem.c
index a3ba988..70273f8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -683,7 +683,7 @@
* the shmem_swaplist_mutex which might hold up shmem_writepage().
* Charged back to the user (not to caller) when swap account is used.
*/
- error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
+ error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL);
if (error)
goto out;
/* No radix_tree_preload: swap entry keeps a place for page in tree */
@@ -1080,7 +1080,7 @@
goto failed;
}
- error = mem_cgroup_cache_charge(page, current->mm,
+ error = mem_cgroup_charge_file(page, current->mm,
gfp & GFP_RECLAIM_MASK);
if (!error) {
error = shmem_add_to_page_cache(page, mapping, index,
@@ -1134,7 +1134,7 @@
SetPageSwapBacked(page);
__set_page_locked(page);
- error = mem_cgroup_cache_charge(page, current->mm,
+ error = mem_cgroup_charge_file(page, current->mm,
gfp & GFP_RECLAIM_MASK);
if (error)
goto decused;
@@ -2723,6 +2723,7 @@
static const struct vm_operations_struct shmem_vm_ops = {
.fault = shmem_fault,
+ .map_pages = filemap_map_pages,
#ifdef CONFIG_NUMA
.set_policy = shmem_set_policy,
.get_policy = shmem_get_policy,
diff --git a/mm/slab.c b/mm/slab.c
index 9153c80..3db4cb0 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3027,7 +3027,7 @@
#ifdef CONFIG_NUMA
/*
- * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
+ * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
*
* If we are in_interrupt, then process context, including cpusets and
* mempolicy, may not apply and should not be used for allocation policy.
@@ -3042,7 +3042,7 @@
if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
nid_alloc = cpuset_slab_spread_node();
else if (current->mempolicy)
- nid_alloc = slab_node();
+ nid_alloc = mempolicy_slab_node();
if (nid_alloc != nid_here)
return ____cache_alloc_node(cachep, flags, nid_alloc);
return NULL;
@@ -3074,7 +3074,7 @@
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
- zonelist = node_zonelist(slab_node(), flags);
+ zonelist = node_zonelist(mempolicy_slab_node(), flags);
retry:
/*
@@ -3259,7 +3259,7 @@
{
void *objp;
- if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
+ if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
objp = alternate_node_alloc(cache, flags);
if (objp)
goto out;
diff --git a/mm/slab.h b/mm/slab.h
index 8184a7c..3045316b 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -55,12 +55,12 @@
struct mem_cgroup;
#ifdef CONFIG_SLUB
struct kmem_cache *
-__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
- size_t align, unsigned long flags, void (*ctor)(void *));
+__kmem_cache_alias(const char *name, size_t size, size_t align,
+ unsigned long flags, void (*ctor)(void *));
#else
static inline struct kmem_cache *
-__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
- size_t align, unsigned long flags, void (*ctor)(void *))
+__kmem_cache_alias(const char *name, size_t size, size_t align,
+ unsigned long flags, void (*ctor)(void *))
{ return NULL; }
#endif
@@ -119,13 +119,6 @@
return !s->memcg_params || s->memcg_params->is_root_cache;
}
-static inline bool cache_match_memcg(struct kmem_cache *cachep,
- struct mem_cgroup *memcg)
-{
- return (is_root_cache(cachep) && !memcg) ||
- (cachep->memcg_params->memcg == memcg);
-}
-
static inline void memcg_bind_pages(struct kmem_cache *s, int order)
{
if (!is_root_cache(s))
@@ -204,12 +197,6 @@
return true;
}
-static inline bool cache_match_memcg(struct kmem_cache *cachep,
- struct mem_cgroup *memcg)
-{
- return true;
-}
-
static inline void memcg_bind_pages(struct kmem_cache *s, int order)
{
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1ec3c61..f3cfccf 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -29,8 +29,7 @@
struct kmem_cache *kmem_cache;
#ifdef CONFIG_DEBUG_VM
-static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
- size_t size)
+static int kmem_cache_sanity_check(const char *name, size_t size)
{
struct kmem_cache *s = NULL;
@@ -57,13 +56,7 @@
}
#if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON)
- /*
- * For simplicity, we won't check this in the list of memcg
- * caches. We have control over memcg naming, and if there
- * aren't duplicates in the global list, there won't be any
- * duplicates in the memcg lists as well.
- */
- if (!memcg && !strcmp(s->name, name)) {
+ if (!strcmp(s->name, name)) {
pr_err("%s (%s): Cache name already exists.\n",
__func__, name);
dump_stack();
@@ -77,8 +70,7 @@
return 0;
}
#else
-static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
- const char *name, size_t size)
+static inline int kmem_cache_sanity_check(const char *name, size_t size)
{
return 0;
}
@@ -139,6 +131,46 @@
return ALIGN(align, sizeof(void *));
}
+static struct kmem_cache *
+do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align,
+ unsigned long flags, void (*ctor)(void *),
+ struct mem_cgroup *memcg, struct kmem_cache *root_cache)
+{
+ struct kmem_cache *s;
+ int err;
+
+ err = -ENOMEM;
+ s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
+ if (!s)
+ goto out;
+
+ s->name = name;
+ s->object_size = object_size;
+ s->size = size;
+ s->align = align;
+ s->ctor = ctor;
+
+ err = memcg_alloc_cache_params(memcg, s, root_cache);
+ if (err)
+ goto out_free_cache;
+
+ err = __kmem_cache_create(s, flags);
+ if (err)
+ goto out_free_cache;
+
+ s->refcount = 1;
+ list_add(&s->list, &slab_caches);
+ memcg_register_cache(s);
+out:
+ if (err)
+ return ERR_PTR(err);
+ return s;
+
+out_free_cache:
+ memcg_free_cache_params(s);
+ kfree(s);
+ goto out;
+}
/*
* kmem_cache_create - Create a cache.
@@ -164,34 +196,21 @@
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
*/
-
struct kmem_cache *
-kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
- size_t align, unsigned long flags, void (*ctor)(void *),
- struct kmem_cache *parent_cache)
+kmem_cache_create(const char *name, size_t size, size_t align,
+ unsigned long flags, void (*ctor)(void *))
{
- struct kmem_cache *s = NULL;
+ struct kmem_cache *s;
+ char *cache_name;
int err;
get_online_cpus();
mutex_lock(&slab_mutex);
- err = kmem_cache_sanity_check(memcg, name, size);
+ err = kmem_cache_sanity_check(name, size);
if (err)
goto out_unlock;
- if (memcg) {
- /*
- * Since per-memcg caches are created asynchronously on first
- * allocation (see memcg_kmem_get_cache()), several threads can
- * try to create the same cache, but only one of them may
- * succeed. Therefore if we get here and see the cache has
- * already been created, we silently return NULL.
- */
- if (cache_from_memcg_idx(parent_cache, memcg_cache_id(memcg)))
- goto out_unlock;
- }
-
/*
* Some allocators will constraint the set of valid flags to a subset
* of all flags. We expect them to define CACHE_CREATE_MASK in this
@@ -200,50 +219,29 @@
*/
flags &= CACHE_CREATE_MASK;
- s = __kmem_cache_alias(memcg, name, size, align, flags, ctor);
+ s = __kmem_cache_alias(name, size, align, flags, ctor);
if (s)
goto out_unlock;
- err = -ENOMEM;
- s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
- if (!s)
+ cache_name = kstrdup(name, GFP_KERNEL);
+ if (!cache_name) {
+ err = -ENOMEM;
goto out_unlock;
+ }
- s->object_size = s->size = size;
- s->align = calculate_alignment(flags, align, size);
- s->ctor = ctor;
-
- s->name = kstrdup(name, GFP_KERNEL);
- if (!s->name)
- goto out_free_cache;
-
- err = memcg_alloc_cache_params(memcg, s, parent_cache);
- if (err)
- goto out_free_cache;
-
- err = __kmem_cache_create(s, flags);
- if (err)
- goto out_free_cache;
-
- s->refcount = 1;
- list_add(&s->list, &slab_caches);
- memcg_register_cache(s);
+ s = do_kmem_cache_create(cache_name, size, size,
+ calculate_alignment(flags, align, size),
+ flags, ctor, NULL, NULL);
+ if (IS_ERR(s)) {
+ err = PTR_ERR(s);
+ kfree(cache_name);
+ }
out_unlock:
mutex_unlock(&slab_mutex);
put_online_cpus();
if (err) {
- /*
- * There is no point in flooding logs with warnings or
- * especially crashing the system if we fail to create a cache
- * for a memcg. In this case we will be accounting the memcg
- * allocation to the root cgroup until we succeed to create its
- * own cache, but it isn't that critical.
- */
- if (!memcg)
- return NULL;
-
if (flags & SLAB_PANIC)
panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
name, err);
@@ -255,52 +253,112 @@
return NULL;
}
return s;
-
-out_free_cache:
- memcg_free_cache_params(s);
- kfree(s->name);
- kmem_cache_free(kmem_cache, s);
- goto out_unlock;
-}
-
-struct kmem_cache *
-kmem_cache_create(const char *name, size_t size, size_t align,
- unsigned long flags, void (*ctor)(void *))
-{
- return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
}
EXPORT_SYMBOL(kmem_cache_create);
-void kmem_cache_destroy(struct kmem_cache *s)
+#ifdef CONFIG_MEMCG_KMEM
+/*
+ * kmem_cache_create_memcg - Create a cache for a memory cgroup.
+ * @memcg: The memory cgroup the new cache is for.
+ * @root_cache: The parent of the new cache.
+ *
+ * This function attempts to create a kmem cache that will serve allocation
+ * requests going from @memcg to @root_cache. The new cache inherits properties
+ * from its parent.
+ */
+void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
- /* Destroy all the children caches if we aren't a memcg cache */
- kmem_cache_destroy_memcg_children(s);
+ struct kmem_cache *s;
+ char *cache_name;
get_online_cpus();
mutex_lock(&slab_mutex);
- s->refcount--;
- if (!s->refcount) {
- list_del(&s->list);
- if (!__kmem_cache_shutdown(s)) {
- memcg_unregister_cache(s);
- mutex_unlock(&slab_mutex);
- if (s->flags & SLAB_DESTROY_BY_RCU)
- rcu_barrier();
+ /*
+ * Since per-memcg caches are created asynchronously on first
+ * allocation (see memcg_kmem_get_cache()), several threads can try to
+ * create the same cache, but only one of them may succeed.
+ */
+ if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg)))
+ goto out_unlock;
- memcg_free_cache_params(s);
- kfree(s->name);
- kmem_cache_free(kmem_cache, s);
- } else {
- list_add(&s->list, &slab_caches);
- mutex_unlock(&slab_mutex);
- printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n",
- s->name);
- dump_stack();
- }
- } else {
- mutex_unlock(&slab_mutex);
+ cache_name = memcg_create_cache_name(memcg, root_cache);
+ if (!cache_name)
+ goto out_unlock;
+
+ s = do_kmem_cache_create(cache_name, root_cache->object_size,
+ root_cache->size, root_cache->align,
+ root_cache->flags, root_cache->ctor,
+ memcg, root_cache);
+ if (IS_ERR(s)) {
+ kfree(cache_name);
+ goto out_unlock;
}
+
+ s->allocflags |= __GFP_KMEMCG;
+
+out_unlock:
+ mutex_unlock(&slab_mutex);
+ put_online_cpus();
+}
+
+static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+ int rc;
+
+ if (!s->memcg_params ||
+ !s->memcg_params->is_root_cache)
+ return 0;
+
+ mutex_unlock(&slab_mutex);
+ rc = __kmem_cache_destroy_memcg_children(s);
+ mutex_lock(&slab_mutex);
+
+ return rc;
+}
+#else
+static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+ return 0;
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
+void kmem_cache_destroy(struct kmem_cache *s)
+{
+ get_online_cpus();
+ mutex_lock(&slab_mutex);
+
+ s->refcount--;
+ if (s->refcount)
+ goto out_unlock;
+
+ if (kmem_cache_destroy_memcg_children(s) != 0)
+ goto out_unlock;
+
+ list_del(&s->list);
+ memcg_unregister_cache(s);
+
+ if (__kmem_cache_shutdown(s) != 0) {
+ list_add(&s->list, &slab_caches);
+ memcg_register_cache(s);
+ printk(KERN_ERR "kmem_cache_destroy %s: "
+ "Slab cache still has objects\n", s->name);
+ dump_stack();
+ goto out_unlock;
+ }
+
+ mutex_unlock(&slab_mutex);
+ if (s->flags & SLAB_DESTROY_BY_RCU)
+ rcu_barrier();
+
+ memcg_free_cache_params(s);
+ kfree(s->name);
+ kmem_cache_free(kmem_cache, s);
+ goto out_put_cpus;
+
+out_unlock:
+ mutex_unlock(&slab_mutex);
+out_put_cpus:
put_online_cpus();
}
EXPORT_SYMBOL(kmem_cache_destroy);
diff --git a/mm/slub.c b/mm/slub.c
index fe6d7be..f620bbf 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -224,7 +224,11 @@
static inline void stat(const struct kmem_cache *s, enum stat_item si)
{
#ifdef CONFIG_SLUB_STATS
- __this_cpu_inc(s->cpu_slab->stat[si]);
+ /*
+ * The rmw is racy on a preemptible kernel but this is acceptable, so
+ * avoid this_cpu_add()'s irq-disable overhead.
+ */
+ raw_cpu_inc(s->cpu_slab->stat[si]);
#endif
}
@@ -1685,7 +1689,7 @@
do {
cpuset_mems_cookie = read_mems_allowed_begin();
- zonelist = node_zonelist(slab_node(), flags);
+ zonelist = node_zonelist(mempolicy_slab_node(), flags);
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
struct kmem_cache_node *n;
@@ -3685,6 +3689,9 @@
if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
return 1;
+ if (!is_root_cache(s))
+ return 1;
+
if (s->ctor)
return 1;
@@ -3697,9 +3704,8 @@
return 0;
}
-static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
- size_t align, unsigned long flags, const char *name,
- void (*ctor)(void *))
+static struct kmem_cache *find_mergeable(size_t size, size_t align,
+ unsigned long flags, const char *name, void (*ctor)(void *))
{
struct kmem_cache *s;
@@ -3722,7 +3728,7 @@
continue;
if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
- continue;
+ continue;
/*
* Check if alignment is compatible.
* Courtesy of Adrian Drzewiecki
@@ -3733,23 +3739,24 @@
if (s->size - size >= sizeof(void *))
continue;
- if (!cache_match_memcg(s, memcg))
- continue;
-
return s;
}
return NULL;
}
struct kmem_cache *
-__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
- size_t align, unsigned long flags, void (*ctor)(void *))
+__kmem_cache_alias(const char *name, size_t size, size_t align,
+ unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *s;
- s = find_mergeable(memcg, size, align, flags, name, ctor);
+ s = find_mergeable(size, align, flags, name, ctor);
if (s) {
+ int i;
+ struct kmem_cache *c;
+
s->refcount++;
+
/*
* Adjust the object sizes so that we clear
* the complete object on kzalloc.
@@ -3757,6 +3764,15 @@
s->object_size = max(s->object_size, (int)size);
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
+ for_each_memcg_cache_index(i) {
+ c = cache_from_memcg_idx(s, i);
+ if (!c)
+ continue;
+ c->object_size = s->object_size;
+ c->inuse = max_t(int, c->inuse,
+ ALIGN(size, sizeof(void *)));
+ }
+
if (sysfs_slab_alias(s, name)) {
s->refcount--;
s = NULL;
@@ -5126,6 +5142,15 @@
static struct kset *slab_kset;
+static inline struct kset *cache_kset(struct kmem_cache *s)
+{
+#ifdef CONFIG_MEMCG_KMEM
+ if (!is_root_cache(s))
+ return s->memcg_params->root_cache->memcg_kset;
+#endif
+ return slab_kset;
+}
+
#define ID_STR_LENGTH 64
/* Create a unique string id for a slab cache:
@@ -5191,26 +5216,39 @@
name = create_unique_id(s);
}
- s->kobj.kset = slab_kset;
+ s->kobj.kset = cache_kset(s);
err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
- if (err) {
- kobject_put(&s->kobj);
- return err;
- }
+ if (err)
+ goto out_put_kobj;
err = sysfs_create_group(&s->kobj, &slab_attr_group);
- if (err) {
- kobject_del(&s->kobj);
- kobject_put(&s->kobj);
- return err;
+ if (err)
+ goto out_del_kobj;
+
+#ifdef CONFIG_MEMCG_KMEM
+ if (is_root_cache(s)) {
+ s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
+ if (!s->memcg_kset) {
+ err = -ENOMEM;
+ goto out_del_kobj;
+ }
}
+#endif
+
kobject_uevent(&s->kobj, KOBJ_ADD);
if (!unmergeable) {
/* Setup first alias */
sysfs_slab_alias(s, s->name);
- kfree(name);
}
- return 0;
+out:
+ if (!unmergeable)
+ kfree(name);
+ return err;
+out_del_kobj:
+ kobject_del(&s->kobj);
+out_put_kobj:
+ kobject_put(&s->kobj);
+ goto out;
}
static void sysfs_slab_remove(struct kmem_cache *s)
@@ -5222,6 +5260,9 @@
*/
return;
+#ifdef CONFIG_MEMCG_KMEM
+ kset_unregister(s->memcg_kset);
+#endif
kobject_uevent(&s->kobj, KOBJ_REMOVE);
kobject_del(&s->kobj);
kobject_put(&s->kobj);
diff --git a/mm/sparse.c b/mm/sparse.c
index 38cad8f..d1b48b6 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -5,10 +5,12 @@
#include <linux/slab.h>
#include <linux/mmzone.h>
#include <linux/bootmem.h>
+#include <linux/compiler.h>
#include <linux/highmem.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
+
#include "internal.h"
#include <asm/dma.h>
#include <asm/pgalloc.h>
@@ -461,7 +463,7 @@
}
#endif
-void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
+void __weak __meminit vmemmap_populate_print_last(void)
{
}
diff --git a/mm/util.c b/mm/util.c
index a24aa22..d7813e6 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1,6 +1,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/err.h>
#include <linux/sched.h>
@@ -307,7 +308,7 @@
* If the architecture not support this function, simply return with no
* page pinned
*/
-int __attribute__((weak)) __get_user_pages_fast(unsigned long start,
+int __weak __get_user_pages_fast(unsigned long start,
int nr_pages, int write, struct page **pages)
{
return 0;
@@ -338,7 +339,7 @@
* callers need to carefully consider what to use. On many architectures,
* get_user_pages_fast simply falls back to get_user_pages.
*/
-int __attribute__((weak)) get_user_pages_fast(unsigned long start,
+int __weak get_user_pages_fast(unsigned long start,
int nr_pages, int write, struct page **pages)
{
struct mm_struct *mm = current->mm;
diff --git a/mm/vmacache.c b/mm/vmacache.c
new file mode 100644
index 0000000..d4224b3
--- /dev/null
+++ b/mm/vmacache.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2014 Davidlohr Bueso.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
+
+/*
+ * Flush vma caches for threads that share a given mm.
+ *
+ * The operation is safe because the caller holds the mmap_sem
+ * exclusively and other threads accessing the vma cache will
+ * have mmap_sem held at least for read, so no extra locking
+ * is required to maintain the vma cache.
+ */
+void vmacache_flush_all(struct mm_struct *mm)
+{
+ struct task_struct *g, *p;
+
+ rcu_read_lock();
+ for_each_process_thread(g, p) {
+ /*
+ * Only flush the vmacache pointers as the
+ * mm seqnum is already set and curr's will
+ * be set upon invalidation when the next
+ * lookup is done.
+ */
+ if (mm == p->mm)
+ vmacache_flush(p);
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * This task may be accessing a foreign mm via (for example)
+ * get_user_pages()->find_vma(). The vmacache is task-local and this
+ * task's vmacache pertains to a different mm (ie, its own). There is
+ * nothing we can do here.
+ *
+ * Also handle the case where a kernel thread has adopted this mm via use_mm().
+ * That kernel thread's vmacache is not applicable to this mm.
+ */
+static bool vmacache_valid_mm(struct mm_struct *mm)
+{
+ return current->mm == mm && !(current->flags & PF_KTHREAD);
+}
+
+void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
+{
+ if (vmacache_valid_mm(newvma->vm_mm))
+ current->vmacache[VMACACHE_HASH(addr)] = newvma;
+}
+
+static bool vmacache_valid(struct mm_struct *mm)
+{
+ struct task_struct *curr;
+
+ if (!vmacache_valid_mm(mm))
+ return false;
+
+ curr = current;
+ if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
+ /*
+ * First attempt will always be invalid, initialize
+ * the new cache for this task here.
+ */
+ curr->vmacache_seqnum = mm->vmacache_seqnum;
+ vmacache_flush(curr);
+ return false;
+ }
+ return true;
+}
+
+struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
+{
+ int i;
+
+ if (!vmacache_valid(mm))
+ return NULL;
+
+ for (i = 0; i < VMACACHE_SIZE; i++) {
+ struct vm_area_struct *vma = current->vmacache[i];
+
+ if (vma && vma->vm_start <= addr && vma->vm_end > addr) {
+ BUG_ON(vma->vm_mm != mm);
+ return vma;
+ }
+ }
+
+ return NULL;
+}
+
+#ifndef CONFIG_MMU
+struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ int i;
+
+ if (!vmacache_valid(mm))
+ return NULL;
+
+ for (i = 0; i < VMACACHE_SIZE; i++) {
+ struct vm_area_struct *vma = current->vmacache[i];
+
+ if (vma && vma->vm_start == start && vma->vm_end == end)
+ return vma;
+ }
+
+ return NULL;
+}
+#endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0fdf968..bf233b2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -27,7 +27,9 @@
#include <linux/pfn.h>
#include <linux/kmemleak.h>
#include <linux/atomic.h>
+#include <linux/compiler.h>
#include <linux/llist.h>
+
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
@@ -1083,6 +1085,12 @@
* @node: prefer to allocate data structures on this node
* @prot: memory protection to use. PAGE_KERNEL for regular RAM
*
+ * If you use this function for less than VMAP_MAX_ALLOC pages, it could be
+ * faster than vmap so it's good. But if you mix long-life and short-life
+ * objects with vm_map_ram(), it could consume lots of address space through
+ * fragmentation (especially on a 32bit machine). You could see failures in
+ * the end. Please use this function for short-lived objects.
+ *
* Returns: a pointer to the address that has been mapped, or %NULL on failure
*/
void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
@@ -2181,7 +2189,7 @@
* Implement a stub for vmalloc_sync_all() if the architecture chose not to
* have one.
*/
-void __attribute__((weak)) vmalloc_sync_all(void)
+void __weak vmalloc_sync_all(void)
{
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1f56a80..06879ea 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2314,15 +2314,18 @@
unsigned long lru_pages = 0;
bool aborted_reclaim = false;
struct reclaim_state *reclaim_state = current->reclaim_state;
+ gfp_t orig_mask;
struct shrink_control shrink = {
.gfp_mask = sc->gfp_mask,
};
+ enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
/*
* If the number of buffer_heads in the machine exceeds the maximum
* allowed level, force direct reclaim to scan the highmem zone as
* highmem pages could be pinning lowmem pages storing buffer_heads
*/
+ orig_mask = sc->gfp_mask;
if (buffer_heads_over_limit)
sc->gfp_mask |= __GFP_HIGHMEM;
@@ -2356,7 +2359,8 @@
* noticeable problem, like transparent huge
* page allocations.
*/
- if (compaction_ready(zone, sc)) {
+ if ((zonelist_zone_idx(z) <= requested_highidx)
+ && compaction_ready(zone, sc)) {
aborted_reclaim = true;
continue;
}
@@ -2393,6 +2397,12 @@
}
}
+ /*
+ * Restore to original mask to avoid the impact on the caller if we
+ * promoted it to __GFP_HIGHMEM.
+ */
+ sc->gfp_mask = orig_mask;
+
return aborted_reclaim;
}
diff --git a/mm/zswap.c b/mm/zswap.c
index d7337fb..aeaef0f 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -89,6 +89,9 @@
module_param_named(max_pool_percent,
zswap_max_pool_percent, uint, 0644);
+/* zbud_pool is shared by all of zswap backend */
+static struct zbud_pool *zswap_pool;
+
/*********************************
* compression functions
**********************************/
@@ -160,14 +163,14 @@
* rbnode - links the entry into red-black tree for the appropriate swap type
* refcount - the number of outstanding reference to the entry. This is needed
* to protect against premature freeing of the entry by code
- * concurent calls to load, invalidate, and writeback. The lock
+ * concurrent calls to load, invalidate, and writeback. The lock
* for the zswap_tree structure that contains the entry must
* be held while changing the refcount. Since the lock must
* be held, there is no reason to also make refcount atomic.
* offset - the swap offset for the entry. Index into the red-black tree.
- * handle - zsmalloc allocation handle that stores the compressed page data
+ * handle - zbud allocation handle that stores the compressed page data
* length - the length in bytes of the compressed page data. Needed during
- * decompression
+ * decompression
*/
struct zswap_entry {
struct rb_node rbnode;
@@ -189,7 +192,6 @@
struct zswap_tree {
struct rb_root rbroot;
spinlock_t lock;
- struct zbud_pool *pool;
};
static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
@@ -202,7 +204,7 @@
static int zswap_entry_cache_create(void)
{
zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
- return (zswap_entry_cache == NULL);
+ return zswap_entry_cache == NULL;
}
static void zswap_entry_cache_destory(void)
@@ -282,16 +284,15 @@
}
/*
- * Carries out the common pattern of freeing and entry's zsmalloc allocation,
+ * Carries out the common pattern of freeing and entry's zbud allocation,
* freeing the entry itself, and decrementing the number of stored pages.
*/
-static void zswap_free_entry(struct zswap_tree *tree,
- struct zswap_entry *entry)
+static void zswap_free_entry(struct zswap_entry *entry)
{
- zbud_free(tree->pool, entry->handle);
+ zbud_free(zswap_pool, entry->handle);
zswap_entry_cache_free(entry);
atomic_dec(&zswap_stored_pages);
- zswap_pool_pages = zbud_get_pool_size(tree->pool);
+ zswap_pool_pages = zbud_get_pool_size(zswap_pool);
}
/* caller must hold the tree lock */
@@ -311,7 +312,7 @@
BUG_ON(refcount < 0);
if (refcount == 0) {
zswap_rb_erase(&tree->rbroot, entry);
- zswap_free_entry(tree, entry);
+ zswap_free_entry(entry);
}
}
@@ -407,8 +408,8 @@
**********************************/
static bool zswap_is_full(void)
{
- return (totalram_pages * zswap_max_pool_percent / 100 <
- zswap_pool_pages);
+ return totalram_pages * zswap_max_pool_percent / 100 <
+ zswap_pool_pages;
}
/*********************************
@@ -545,7 +546,6 @@
zbud_unmap(pool, handle);
tree = zswap_trees[swp_type(swpentry)];
offset = swp_offset(swpentry);
- BUG_ON(pool != tree->pool);
/* find and ref zswap entry */
spin_lock(&tree->lock);
@@ -573,13 +573,13 @@
case ZSWAP_SWAPCACHE_NEW: /* page is locked */
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zbud_map(tree->pool, entry->handle) +
+ src = (u8 *)zbud_map(zswap_pool, entry->handle) +
sizeof(struct zswap_header);
dst = kmap_atomic(page);
ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
entry->length, dst, &dlen);
kunmap_atomic(dst);
- zbud_unmap(tree->pool, entry->handle);
+ zbud_unmap(zswap_pool, entry->handle);
BUG_ON(ret);
BUG_ON(dlen != PAGE_SIZE);
@@ -652,7 +652,7 @@
/* reclaim space if needed */
if (zswap_is_full()) {
zswap_pool_limit_hit++;
- if (zbud_reclaim_page(tree->pool, 8)) {
+ if (zbud_reclaim_page(zswap_pool, 8)) {
zswap_reject_reclaim_fail++;
ret = -ENOMEM;
goto reject;
@@ -679,7 +679,7 @@
/* store */
len = dlen + sizeof(struct zswap_header);
- ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN,
+ ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
&handle);
if (ret == -ENOSPC) {
zswap_reject_compress_poor++;
@@ -689,11 +689,11 @@
zswap_reject_alloc_fail++;
goto freepage;
}
- zhdr = zbud_map(tree->pool, handle);
+ zhdr = zbud_map(zswap_pool, handle);
zhdr->swpentry = swp_entry(type, offset);
buf = (u8 *)(zhdr + 1);
memcpy(buf, dst, dlen);
- zbud_unmap(tree->pool, handle);
+ zbud_unmap(zswap_pool, handle);
put_cpu_var(zswap_dstmem);
/* populate entry */
@@ -716,7 +716,7 @@
/* update stats */
atomic_inc(&zswap_stored_pages);
- zswap_pool_pages = zbud_get_pool_size(tree->pool);
+ zswap_pool_pages = zbud_get_pool_size(zswap_pool);
return 0;
@@ -752,13 +752,13 @@
/* decompress */
dlen = PAGE_SIZE;
- src = (u8 *)zbud_map(tree->pool, entry->handle) +
+ src = (u8 *)zbud_map(zswap_pool, entry->handle) +
sizeof(struct zswap_header);
dst = kmap_atomic(page);
ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
dst, &dlen);
kunmap_atomic(dst);
- zbud_unmap(tree->pool, entry->handle);
+ zbud_unmap(zswap_pool, entry->handle);
BUG_ON(ret);
spin_lock(&tree->lock);
@@ -804,11 +804,9 @@
/* walk the tree and free everything */
spin_lock(&tree->lock);
rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
- zswap_free_entry(tree, entry);
+ zswap_free_entry(entry);
tree->rbroot = RB_ROOT;
spin_unlock(&tree->lock);
-
- zbud_destroy_pool(tree->pool);
kfree(tree);
zswap_trees[type] = NULL;
}
@@ -822,20 +820,14 @@
struct zswap_tree *tree;
tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
- if (!tree)
- goto err;
- tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
- if (!tree->pool)
- goto freetree;
+ if (!tree) {
+ pr_err("alloc failed, zswap disabled for swap type %d\n", type);
+ return;
+ }
+
tree->rbroot = RB_ROOT;
spin_lock_init(&tree->lock);
zswap_trees[type] = tree;
- return;
-
-freetree:
- kfree(tree);
-err:
- pr_err("alloc failed, zswap disabled for swap type %d\n", type);
}
static struct frontswap_ops zswap_frontswap_ops = {
@@ -907,9 +899,16 @@
return 0;
pr_info("loading zswap\n");
+
+ zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+ if (!zswap_pool) {
+ pr_err("zbud pool creation failed\n");
+ goto error;
+ }
+
if (zswap_entry_cache_create()) {
pr_err("entry cache creation failed\n");
- goto error;
+ goto cachefail;
}
if (zswap_comp_init()) {
pr_err("compressor initialization failed\n");
@@ -919,6 +918,7 @@
pr_err("per-cpu initialization failed\n");
goto pcpufail;
}
+
frontswap_register_ops(&zswap_frontswap_ops);
if (zswap_debugfs_init())
pr_warn("debugfs initialization failed\n");
@@ -927,6 +927,8 @@
zswap_comp_exit();
compfail:
zswap_entry_cache_destory();
+cachefail:
+ zbud_destroy_pool(zswap_pool);
error:
return -ENOMEM;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1be9e99..34d094c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -188,7 +188,7 @@
EXPORT_SYMBOL(ip_tos2prio);
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
-#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
+#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
#ifdef CONFIG_PROC_FS
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 87f7238..f88d90f 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -1178,7 +1178,10 @@
sym->def[S_DEF_USER].tri = mod;
break;
case def_no:
- sym->def[S_DEF_USER].tri = no;
+ if (sym->flags & SYMBOL_ALLNOCONFIG_Y)
+ sym->def[S_DEF_USER].tri = yes;
+ else
+ sym->def[S_DEF_USER].tri = no;
break;
case def_random:
sym->def[S_DEF_USER].tri = no;
diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index ba663e1..412ea8a 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -109,6 +109,9 @@
/* choice values need to be set before calculating this symbol value */
#define SYMBOL_NEED_SET_CHOICE_VALUES 0x100000
+/* Set symbol to y if allnoconfig; used for symbols that hide others */
+#define SYMBOL_ALLNOCONFIG_Y 0x200000
+
#define SYMBOL_MAXLENGTH 256
#define SYMBOL_HASHSIZE 9973
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index 09f4edf..d5daa7a 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -61,6 +61,7 @@
#define T_OPT_MODULES 1
#define T_OPT_DEFCONFIG_LIST 2
#define T_OPT_ENV 3
+#define T_OPT_ALLNOCONFIG_Y 4
struct kconf_id {
int name;
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index db1512a..3ac2c9c 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -217,6 +217,9 @@
case T_OPT_ENV:
prop_add_env(arg);
break;
+ case T_OPT_ALLNOCONFIG_Y:
+ current_entry->sym->flags |= SYMBOL_ALLNOCONFIG_Y;
+ break;
}
}
diff --git a/scripts/kconfig/zconf.gperf b/scripts/kconfig/zconf.gperf
index f14ab41..b6ac02d 100644
--- a/scripts/kconfig/zconf.gperf
+++ b/scripts/kconfig/zconf.gperf
@@ -44,4 +44,5 @@
modules, T_OPT_MODULES, TF_OPTION
defconfig_list, T_OPT_DEFCONFIG_LIST,TF_OPTION
env, T_OPT_ENV, TF_OPTION
+allnoconfig_y, T_OPT_ALLNOCONFIG_Y,TF_OPTION
%%
diff --git a/scripts/kconfig/zconf.hash.c_shipped b/scripts/kconfig/zconf.hash.c_shipped
index 40df000..c77a8ef 100644
--- a/scripts/kconfig/zconf.hash.c_shipped
+++ b/scripts/kconfig/zconf.hash.c_shipped
@@ -55,10 +55,10 @@
73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
- 73, 73, 73, 73, 73, 73, 73, 73, 25, 25,
+ 73, 73, 73, 73, 73, 73, 73, 5, 25, 25,
0, 0, 0, 5, 0, 0, 73, 73, 5, 0,
10, 5, 45, 73, 20, 20, 0, 15, 15, 73,
- 20, 73, 73, 73, 73, 73, 73, 73, 73, 73,
+ 20, 5, 73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
@@ -106,6 +106,7 @@
char kconf_id_strings_str23[sizeof("mainmenu")];
char kconf_id_strings_str25[sizeof("menuconfig")];
char kconf_id_strings_str27[sizeof("modules")];
+ char kconf_id_strings_str28[sizeof("allnoconfig_y")];
char kconf_id_strings_str29[sizeof("menu")];
char kconf_id_strings_str31[sizeof("select")];
char kconf_id_strings_str32[sizeof("comment")];
@@ -141,6 +142,7 @@
"mainmenu",
"menuconfig",
"modules",
+ "allnoconfig_y",
"menu",
"select",
"comment",
@@ -170,7 +172,7 @@
{
enum
{
- TOTAL_KEYWORDS = 32,
+ TOTAL_KEYWORDS = 33,
MIN_WORD_LENGTH = 2,
MAX_WORD_LENGTH = 14,
MIN_HASH_VALUE = 2,
@@ -219,7 +221,8 @@
{-1},
#line 44 "scripts/kconfig/zconf.gperf"
{(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str27, T_OPT_MODULES, TF_OPTION},
- {-1},
+#line 47 "scripts/kconfig/zconf.gperf"
+ {(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str28, T_OPT_ALLNOCONFIG_Y,TF_OPTION},
#line 16 "scripts/kconfig/zconf.gperf"
{(int)(long)&((struct kconf_id_strings_t *)0)->kconf_id_strings_str29, T_MENU, TF_COMMAND},
{-1},
@@ -282,5 +285,5 @@
}
return 0;
}
-#line 47 "scripts/kconfig/zconf.gperf"
+#line 48 "scripts/kconfig/zconf.gperf"
diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index affa134..0216475 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -191,7 +191,7 @@
config SND_SC6000
tristate "Gallant SC-6000/6600/7000 and Audio Excel DSP 16"
- depends on HAS_IOPORT
+ depends on HAS_IOPORT_MAP
select SND_WSS_LIB
select SND_OPL3_LIB
select SND_MPU401_UART
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 0b0c0cf..3a3a3a7 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -688,7 +688,7 @@
config SND_LX6464ES
tristate "Digigram LX6464ES"
- depends on HAS_IOPORT
+ depends on HAS_IOPORT_MAP
select SND_PCM
help
Say Y here to include support for Digigram LX6464ES boards.
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index f9be24d..05654f5 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -19,7 +19,8 @@
* Authors: Wu Fengguang <fengguang.wu@intel.com>
*/
-#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -29,11 +30,14 @@
#include <getopt.h>
#include <limits.h>
#include <assert.h>
+#include <ftw.h>
+#include <time.h>
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/mount.h>
#include <sys/statfs.h>
+#include <sys/mman.h>
#include "../../include/uapi/linux/magic.h"
#include "../../include/uapi/linux/kernel-page-flags.h"
#include <api/fs/debugfs.h>
@@ -158,6 +162,7 @@
static int opt_list; /* list pages (in ranges) */
static int opt_no_summary; /* don't show summary */
static pid_t opt_pid; /* process to walk */
+const char * opt_file;
#define MAX_ADDR_RANGES 1024
static int nr_addr_ranges;
@@ -253,12 +258,7 @@
if (index > ULONG_MAX / 8)
fatal("index overflow: %lu\n", index);
- if (lseek(fd, index * 8, SEEK_SET) < 0) {
- perror(name);
- exit(EXIT_FAILURE);
- }
-
- bytes = read(fd, buf, count * 8);
+ bytes = pread(fd, buf, count * 8, (off_t)index * 8);
if (bytes < 0) {
perror(name);
exit(EXIT_FAILURE);
@@ -343,8 +343,8 @@
* page list and summary
*/
-static void show_page_range(unsigned long voffset,
- unsigned long offset, uint64_t flags)
+static void show_page_range(unsigned long voffset, unsigned long offset,
+ unsigned long size, uint64_t flags)
{
static uint64_t flags0;
static unsigned long voff;
@@ -352,14 +352,16 @@
static unsigned long count;
if (flags == flags0 && offset == index + count &&
- (!opt_pid || voffset == voff + count)) {
- count++;
+ size && voffset == voff + count) {
+ count += size;
return;
}
if (count) {
if (opt_pid)
printf("%lx\t", voff);
+ if (opt_file)
+ printf("%lu\t", voff);
printf("%lx\t%lx\t%s\n",
index, count, page_flag_name(flags0));
}
@@ -367,7 +369,12 @@
flags0 = flags;
index = offset;
voff = voffset;
- count = 1;
+ count = size;
+}
+
+static void flush_page_range(void)
+{
+ show_page_range(0, 0, 0, 0);
}
static void show_page(unsigned long voffset,
@@ -375,6 +382,8 @@
{
if (opt_pid)
printf("%lx\t", voffset);
+ if (opt_file)
+ printf("%lu\t", voffset);
printf("%lx\t%s\n", offset, page_flag_name(flags));
}
@@ -565,7 +574,7 @@
unpoison_page(offset);
if (opt_list == 1)
- show_page_range(voffset, offset, flags);
+ show_page_range(voffset, offset, 1, flags);
else if (opt_list == 2)
show_page(voffset, offset, flags);
@@ -667,7 +676,7 @@
for (i = 0; i < nr_addr_ranges; i++)
if (!opt_pid)
- walk_pfn(0, opt_offset[i], opt_size[i], 0);
+ walk_pfn(opt_offset[i], opt_offset[i], opt_size[i], 0);
else
walk_task(opt_offset[i], opt_size[i]);
@@ -699,9 +708,7 @@
" -a|--addr addr-spec Walk a range of pages\n"
" -b|--bits bits-spec Walk pages with specified bits\n"
" -p|--pid pid Walk process address space\n"
-#if 0 /* planned features */
" -f|--file filename Walk file address space\n"
-#endif
" -l|--list Show page details in ranges\n"
" -L|--list-each Show page details one by one\n"
" -N|--no-summary Don't show summary info\n"
@@ -799,8 +806,130 @@
fclose(file);
}
+static void show_file(const char *name, const struct stat *st)
+{
+ unsigned long long size = st->st_size;
+ char atime[64], mtime[64];
+ long now = time(NULL);
+
+ printf("%s\tInode: %u\tSize: %llu (%llu pages)\n",
+ name, (unsigned)st->st_ino,
+ size, (size + page_size - 1) / page_size);
+
+ strftime(atime, sizeof(atime), "%c", localtime(&st->st_atime));
+ strftime(mtime, sizeof(mtime), "%c", localtime(&st->st_mtime));
+
+ printf("Modify: %s (%ld seconds ago)\nAccess: %s (%ld seconds ago)\n",
+ mtime, now - st->st_mtime,
+ atime, now - st->st_atime);
+}
+
+static void walk_file(const char *name, const struct stat *st)
+{
+ uint8_t vec[PAGEMAP_BATCH];
+ uint64_t buf[PAGEMAP_BATCH], flags;
+ unsigned long nr_pages, pfn, i;
+ int fd;
+ off_t off;
+ ssize_t len;
+ void *ptr;
+ int first = 1;
+
+ fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW);
+
+ for (off = 0; off < st->st_size; off += len) {
+ nr_pages = (st->st_size - off + page_size - 1) / page_size;
+ if (nr_pages > PAGEMAP_BATCH)
+ nr_pages = PAGEMAP_BATCH;
+ len = nr_pages * page_size;
+
+ ptr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, off);
+ if (ptr == MAP_FAILED)
+ fatal("mmap failed: %s", name);
+
+ /* determine cached pages */
+ if (mincore(ptr, len, vec))
+ fatal("mincore failed: %s", name);
+
+ /* turn off readahead */
+ if (madvise(ptr, len, MADV_RANDOM))
+ fatal("madvice failed: %s", name);
+
+ /* populate ptes */
+ for (i = 0; i < nr_pages ; i++) {
+ if (vec[i] & 1)
+ (void)*(volatile int *)(ptr + i * page_size);
+ }
+
+ /* turn off harvesting reference bits */
+ if (madvise(ptr, len, MADV_SEQUENTIAL))
+ fatal("madvice failed: %s", name);
+
+ if (pagemap_read(buf, (unsigned long)ptr / page_size,
+ nr_pages) != nr_pages)
+ fatal("cannot read pagemap");
+
+ munmap(ptr, len);
+
+ for (i = 0; i < nr_pages; i++) {
+ pfn = pagemap_pfn(buf[i]);
+ if (!pfn)
+ continue;
+ if (!kpageflags_read(&flags, pfn, 1))
+ continue;
+ if (first && opt_list) {
+ first = 0;
+ flush_page_range();
+ show_file(name, st);
+ }
+ add_page(off / page_size + i, pfn, flags, buf[i]);
+ }
+ }
+
+ close(fd);
+}
+
+int walk_tree(const char *name, const struct stat *st, int type, struct FTW *f)
+{
+ (void)f;
+ switch (type) {
+ case FTW_F:
+ if (S_ISREG(st->st_mode))
+ walk_file(name, st);
+ break;
+ case FTW_DNR:
+ fprintf(stderr, "cannot read dir: %s\n", name);
+ break;
+ }
+ return 0;
+}
+
+static void walk_page_cache(void)
+{
+ struct stat st;
+
+ kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
+ pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY);
+
+ if (stat(opt_file, &st))
+ fatal("stat failed: %s\n", opt_file);
+
+ if (S_ISREG(st.st_mode)) {
+ walk_file(opt_file, &st);
+ } else if (S_ISDIR(st.st_mode)) {
+ /* do not follow symlinks and mountpoints */
+ if (nftw(opt_file, walk_tree, 64, FTW_MOUNT | FTW_PHYS) < 0)
+ fatal("nftw failed: %s\n", opt_file);
+ } else
+ fatal("unhandled file type: %s\n", opt_file);
+
+ close(kpageflags_fd);
+ close(pagemap_fd);
+}
+
static void parse_file(const char *name)
{
+ opt_file = name;
}
static void parse_addr_range(const char *optarg)
@@ -991,15 +1120,20 @@
if (opt_list && opt_pid)
printf("voffset\t");
+ if (opt_list && opt_file)
+ printf("foffset\t");
if (opt_list == 1)
printf("offset\tlen\tflags\n");
if (opt_list == 2)
printf("offset\tflags\n");
- walk_addr_ranges();
+ if (opt_file)
+ walk_page_cache();
+ else
+ walk_addr_ranges();
if (opt_list == 1)
- show_page_range(0, 0, 0); /* drain the buffer */
+ flush_page_range();
if (opt_no_summary)
return 0;