Merge android-4.9.130 (1c7637c) into msm-4.9
* refs/heads/tmp-1c7637c:
Revert "f2fs: use timespec64 for inode timestamps"
Linux 4.9.130
iw_cxgb4: only allow 1 flush on user qps
vmw_balloon: include asm/io.h
PCI: aardvark: Size bridges before resources allocation
HID: sony: Support DS4 dongle
HID: sony: Update device ids
sched/fair: Fix vruntime_normalized() for remote non-migration wakeup
ext4: show test_dummy_encryption mount option in /proc/mounts
ext4: don't mark mmp buffer head dirty
ext4: fix online resizing for bigalloc file systems with a 1k block size
ext4: fix online resize's handling of a too-small final block group
ext4: recalucate superblock checksum after updating free blocks/inodes
ext4: avoid divide by zero fault when deleting corrupted inline directories
ext4: check to make sure the rename(2)'s destination is not freed
tty: vt_ioctl: fix potential Spectre v1
drm/vc4: Fix the "no scaling" case on multi-planar YUV formats
drm/nouveau/drm/nouveau: Prevent handling ACPI HPD events too early
drm/nouveau/drm/nouveau: Use pm_runtime_get_noresume() in connector_detect()
drm/nouveau/drm/nouveau: Fix bogus drm_kms_helper_poll_enable() placement
ocfs2: fix ocfs2 read block panic
scsi: target: iscsi: Use hex2bin instead of a re-implementation
neighbour: confirm neigh entries when ARP packet is received
udp4: fix IP_CMSG_CHECKSUM for connected sockets
net: hp100: fix always-true check for link up state
net/appletalk: fix minor pointer leak to userspace in SIOCFINDIPDDPRT
ipv6: fix possible use-after-free in ip6_xmit()
gso_segment: Reset skb->mac_len after modifying network header
mm: shmem.c: Correctly annotate new inodes for lockdep
ring-buffer: Allow for rescheduling when removing pages
Revert "PCI: Add ACS quirk for Intel 300 series"
xen/x86/vpmu: Zero struct pt_regs before calling into sample handling code
xen/netfront: don't bug in case of too many frags
platform/x86: alienware-wmi: Correct a memory leak
ALSA: oxfw: fix memory leak of private data
ALSA: oxfw: fix memory leak of discovered stream formats at error path
ALSA: oxfw: fix memory leak for model-dependent data at error path
ALSA: fireworks: fix memory leak of response buffer at error path
ALSA: firewire-tascam: fix memory leak of private data
ALSA: firewire-digi00x: fix memory leak of private data
ALSA: emu10k1: fix possible info leak to userspace on SNDRV_EMU10K1_IOCTL_INFO
ALSA: bebob: use address returned by kmalloc() instead of kernel stack for streaming DMA mapping
ALSA: bebob: fix memory leak for M-Audio FW1814 and ProjectMix I/O at error path
ASoC: cs4265: fix MMTLR Data switch control
NFC: Fix the number of pipes
NFC: Fix possible memory corruption when handling SHDLC I-Frame commands
ANDROID: restrict store of prefer_idle as boolean
f2fs: readahead encrypted block during GC
f2fs: avoid fi->i_gc_rwsem[WRITE] lock in f2fs_gc
f2fs: fix performance issue observed with multi-thread sequential read
f2fs: fix to skip verifying block address for non-regular inode
f2fs: rework fault injection handling to avoid a warning
f2fs: support fault_type mount option
f2fs: fix to return success when trimming meta area
f2fs: fix use-after-free of dicard command entry
f2fs: support discard submission error injection
f2fs: split discard command in prior to block layer
f2fs: wake up gc thread immediately when gc_urgent is set
f2fs: fix incorrect range->len in f2fs_trim_fs()
f2fs: refresh recent accessed nat entry in lru list
f2fs: fix avoid race between truncate and background GC
f2fs: avoid race between zero_range and background GC
f2fs: fix to do sanity check with block address in main area v2
f2fs: fix to do sanity check with inline flags
f2fs: fix to reset i_gc_failures correctly
f2fs: fix invalid memory access
f2fs: fix to avoid broken of dnode block list
f2fs: use true and false for boolean values
f2fs: fix to do sanity check with cp_pack_start_sum
f2fs: avoid f2fs_bug_on() in cp_error case
f2fs: fix to clear PG_checked flag in set_page_dirty()
f2fs: fix to active page in lru list for read path
f2fs: don't keep meta pages used for block migration
f2fs: fix to restrict mount condition when without CONFIG_QUOTA
f2fs: quota: do not mount as RDWR without QUOTA if quota feature enabled
f2fs: quota: fix incorrect comments
f2fs: add proc entry to show victim_secmap bitmap
f2fs: let checkpoint flush dnode page of regular
f2fs: issue discard align to section in LFS mode
f2fs: don't allow any writes on aborted atomic writes
f2fs: restrict setting up inode.i_advise
f2fs: fix wrong kernel message when recover fsync data on ro fs
f2fs: clean up ioctl interface naming
f2fs: clean up with f2fs_is_{atomic,volatile}_file()
f2fs: clean up with f2fs_encrypted_inode()
f2fs: clean up with get_current_nat_page
f2fs: kill EXT_TREE_VEC_SIZE
f2fs: avoid duplicated permission check for "trusted." xattrs
f2fs: fix to propagate error from __get_meta_page()
f2fs: fix to do sanity check with i_extra_isize
f2fs: blk_finish_plug of submit_bio in lfs mode
f2fs: do not set free of current section
f2fs: Keep alloc_valid_block_count in sync
f2fs: issue small discard by LBA order
f2fs: stop issuing discard immediately if there is queued IO
f2fs: clean up with IS_INODE()
f2fs: detect bug_on in f2fs_wait_discard_bios
f2fs: fix defined but not used build warnings
f2fs: enable real-time discard by default
f2fs: fix to detect looped node chain correctly
f2fs: fix to do sanity check with block address in main area
f2fs: fix to skip GC if type in SSA and SIT is inconsistent
f2fs: try grabbing node page lock aggressively in sync scenario
f2fs: show the fsync_mode=nobarrier mount option
f2fs: check the right return value of memory alloc function
f2fs: Replace strncpy with memcpy
f2fs: avoid the global name 'fault_name'
f2fs: fix to do sanity check with reserved blkaddr of inline inode
f2fs: fix to do sanity check with node footer and iblocks
f2fs: Allocate and stat mem used by free nid bitmap more accurately
f2fs: fix to do sanity check with user_block_count
f2fs: fix to do sanity check with extra_attr feature
f2fs: fix to correct return value of f2fs_trim_fs
f2fs: fix to do sanity check with {sit,nat}_ver_bitmap_bytesize
f2fs: fix to do sanity check with secs_per_zone
f2fs: disable f2fs_check_rb_tree_consistence
f2fs: introduce and spread verify_blkaddr
f2fs: use timespec64 for inode timestamps
f2fs: fix to wait on page writeback before updating page
f2fs: assign REQ_RAHEAD to bio for ->readpages
f2fs: fix a hungtask problem caused by congestion_wait
f2fs: Fix uninitialized return in f2fs_ioc_shutdown()
f2fs: don't issue discard commands in online discard is on
f2fs: fix to propagate return value of scan_nat_page()
f2fs: support in-memory inode checksum when checking consistency
f2fs: fix error path of fill_super
f2fs: relocate readdir_ra configure initialization
f2fs: move s_res{u,g}id initialization to default_options()
f2fs: don't acquire orphan ino during recovery
f2fs: avoid potential deadlock in f2fs_sbi_store
f2fs: indicate shutdown f2fs to allow unmount successfully
f2fs: keep meta pages in cp_error state
f2fs: do checkpoint in kill_sb
f2fs: allow wrong configured dio to buffered write
f2fs: flush journal nat entries for nat_bits during unmount
Conflicts:
drivers/hid/hid-core.c
fs/f2fs/f2fs.h
Change-Id: I482eb204568e7119959f85f975dc4bb7a720b77d
Signed-off-by: Minming Qi <mqi@codeaurora.org>
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 193a034..d9a0f69 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -154,6 +154,26 @@
enabled by default.
data_flush Enable data flushing before checkpoint in order to
persist data of regular and symlink.
+fault_injection=%d Enable fault injection in all supported types with
+ specified injection rate.
+fault_type=%d Support configuring fault injection type, should be
+ enabled with fault_injection option, fault type value
+ is shown below, it supports single or combined type.
+ Type_Name Type_Value
+ FAULT_KMALLOC 0x000000001
+ FAULT_KVMALLOC 0x000000002
+ FAULT_PAGE_ALLOC 0x000000004
+ FAULT_PAGE_GET 0x000000008
+ FAULT_ALLOC_BIO 0x000000010
+ FAULT_ALLOC_NID 0x000000020
+ FAULT_ORPHAN 0x000000040
+ FAULT_BLOCK 0x000000080
+ FAULT_DIR_DEPTH 0x000000100
+ FAULT_EVICT_INODE 0x000000200
+ FAULT_TRUNCATE 0x000000400
+ FAULT_IO 0x000000800
+ FAULT_CHECKPOINT 0x000001000
+ FAULT_DISCARD 0x000002000
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
diff --git a/Makefile b/Makefile
index cdc22f3..72ee918 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 9
-SUBLEVEL = 129
+SUBLEVEL = 130
EXTRAVERSION =
NAME = Roaring Lionus
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index b9fc525..0b29a43 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -477,7 +477,7 @@
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
int err, ret = IRQ_NONE;
- struct pt_regs regs;
+ struct pt_regs regs = {0};
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
uint8_t xenpmu_flags = get_xenpmu_flags();
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 56c288f..5bfae1f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -271,12 +271,16 @@
nv_connector->edid = NULL;
}
- /* Outputs are only polled while runtime active, so acquiring a
- * runtime PM ref here is unnecessary (and would deadlock upon
- * runtime suspend because it waits for polling to finish).
+ /* Outputs are only polled while runtime active, so resuming the
+ * device here is unnecessary (and would deadlock upon runtime suspend
+ * because it waits for polling to finish). We do however, want to
+ * prevent the autosuspend timer from elapsing during this operation
+ * if possible.
*/
- if (!drm_kms_helper_is_poll_worker()) {
- ret = pm_runtime_get_sync(connector->dev->dev);
+ if (drm_kms_helper_is_poll_worker()) {
+ pm_runtime_get_noresume(dev->dev);
+ } else {
+ ret = pm_runtime_get_sync(dev->dev);
if (ret < 0 && ret != -EACCES)
return conn_status;
}
@@ -354,10 +358,8 @@
out:
- if (!drm_kms_helper_is_poll_worker()) {
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
- }
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
return conn_status;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 6526a33..3ddd409 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -367,8 +367,6 @@
pm_runtime_get_sync(drm->dev->dev);
drm_helper_hpd_irq_event(drm->dev);
- /* enable polling for external displays */
- drm_kms_helper_poll_enable(drm->dev);
pm_runtime_mark_last_busy(drm->dev->dev);
pm_runtime_put_sync(drm->dev->dev);
@@ -391,15 +389,29 @@
{
struct nouveau_drm *drm = container_of(nb, typeof(*drm), acpi_nb);
struct acpi_bus_event *info = data;
+ int ret;
if (!strcmp(info->device_class, ACPI_VIDEO_CLASS)) {
if (info->type == ACPI_VIDEO_NOTIFY_PROBE) {
- /*
- * This may be the only indication we receive of a
- * connector hotplug on a runtime suspended GPU,
- * schedule hpd_work to check.
- */
- schedule_work(&drm->hpd_work);
+ ret = pm_runtime_get(drm->dev->dev);
+ if (ret == 1 || ret == -EACCES) {
+ /* If the GPU is already awake, or in a state
+ * where we can't wake it up, it can handle
+ * it's own hotplug events.
+ */
+ pm_runtime_put_autosuspend(drm->dev->dev);
+ } else if (ret == 0) {
+ /* This may be the only indication we receive
+ * of a connector hotplug on a runtime
+ * suspended GPU, schedule hpd_work to check.
+ */
+ NV_DEBUG(drm, "ACPI requested connector reprobe\n");
+ schedule_work(&drm->hpd_work);
+ pm_runtime_put_noidle(drm->dev->dev);
+ } else {
+ NV_WARN(drm, "Dropped ACPI reprobe event due to RPM error: %d\n",
+ ret);
+ }
/* acpi-video should not generate keypresses for this */
return NOTIFY_BAD;
@@ -422,6 +434,11 @@
if (ret)
return ret;
+ /* enable connector detection and polling for connectors without HPD
+ * support
+ */
+ drm_kms_helper_poll_enable(dev);
+
/* enable hotplug interrupts */
list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
struct nouveau_connector *conn = nouveau_connector(connector);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index f8c9f6f..a2d8630 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -327,6 +327,9 @@
vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
vc4_state->crtc_h);
+ vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
+ vc4_state->y_scaling[0] == VC4_SCALING_NONE);
+
if (num_planes > 1) {
vc4_state->is_yuv = true;
@@ -342,24 +345,17 @@
vc4_get_scaling_mode(vc4_state->src_h[1],
vc4_state->crtc_h);
- /* YUV conversion requires that scaling be enabled,
- * even on a plane that's otherwise 1:1. Choose TPZ
- * for simplicity.
+ /* YUV conversion requires that horizontal scaling be enabled,
+ * even on a plane that's otherwise 1:1. Looks like only PPF
+ * works in that case, so let's pick that one.
*/
- if (vc4_state->x_scaling[0] == VC4_SCALING_NONE)
- vc4_state->x_scaling[0] = VC4_SCALING_TPZ;
- if (vc4_state->y_scaling[0] == VC4_SCALING_NONE)
- vc4_state->y_scaling[0] = VC4_SCALING_TPZ;
+ if (vc4_state->is_unity)
+ vc4_state->x_scaling[0] = VC4_SCALING_PPF;
} else {
vc4_state->x_scaling[1] = VC4_SCALING_NONE;
vc4_state->y_scaling[1] = VC4_SCALING_NONE;
}
- vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
- vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
- vc4_state->x_scaling[1] == VC4_SCALING_NONE &&
- vc4_state->y_scaling[1] == VC4_SCALING_NONE);
-
/* No configuring scaling on the cursor plane, since it gets
non-vblank-synced updates, and scaling requires requires
LBM changes which have to be vblank-synced.
@@ -614,7 +610,10 @@
vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
}
- if (!vc4_state->is_unity) {
+ if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
/* LBM Base Address. */
if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 2059d9d..48856a0 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2064,6 +2064,10 @@
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2) },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SINO_LITE, USB_DEVICE_ID_SINO_LITE_CONTROLLER) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 0280e28..ebad6952 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -927,6 +927,8 @@
#define USB_DEVICE_ID_SONY_PS3_BDREMOTE 0x0306
#define USB_DEVICE_ID_SONY_PS3_CONTROLLER 0x0268
#define USB_DEVICE_ID_SONY_PS4_CONTROLLER 0x05c4
+#define USB_DEVICE_ID_SONY_PS4_CONTROLLER_2 0x09cc
+#define USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE 0x0ba0
#define USB_DEVICE_ID_SONY_MOTION_CONTROLLER 0x03d5
#define USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER 0x042f
#define USB_DEVICE_ID_SONY_BUZZ_CONTROLLER 0x0002
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 1b1dccd..eee58d1 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2581,6 +2581,12 @@
.driver_data = DUALSHOCK4_CONTROLLER_USB },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER),
.driver_data = DUALSHOCK4_CONTROLLER_BT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2),
+ .driver_data = DUALSHOCK4_CONTROLLER_USB },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2),
+ .driver_data = DUALSHOCK4_CONTROLLER_BT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE),
+ .driver_data = DUALSHOCK4_CONTROLLER_USB },
/* Nyko Core Controller for PS3 */
{ HID_USB_DEVICE(USB_VENDOR_ID_SINO_LITE, USB_DEVICE_ID_SINO_LITE_CONTROLLER),
.driver_data = SIXAXIS_CONTROLLER_USB | SINO_LITE_CONTROLLER },
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index cc2243f..bb45eb2 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1258,6 +1258,12 @@
t4_set_wq_in_error(&qhp->wq);
if (qhp->ibqp.uobject) {
+
+ /* for user qps, qhp->wq.flushed is protected by qhp->mutex */
+ if (qhp->wq.flushed)
+ return;
+
+ qhp->wq.flushed = 1;
t4_set_cq_in_error(&rchp->cq);
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 518e2de..5e9122cd 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -45,6 +45,7 @@
#include <linux/seq_file.h>
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
+#include <linux/io.h>
#include <asm/hypervisor.h>
MODULE_AUTHOR("VMware, Inc.");
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 2e46496..4e98e5a 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -284,8 +284,12 @@
case SIOCFINDIPDDPRT:
spin_lock_bh(&ipddp_route_lock);
rp = __ipddp_find_route(&rcp);
- if (rp)
- memcpy(&rcp2, rp, sizeof(rcp2));
+ if (rp) {
+ memset(&rcp2, 0, sizeof(rcp2));
+ rcp2.ip = rp->ip;
+ rcp2.at = rp->at;
+ rcp2.flags = rp->flags;
+ }
spin_unlock_bh(&ipddp_route_lock);
if (rp) {
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index 631dbc7..0988bf1 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -2636,7 +2636,7 @@
/* Wait for link to drop */
time = jiffies + (HZ / 10);
do {
- if (~(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
+ if (!(hp100_inb(VG_LAN_CFG_1) & HP100_LINK_UP_ST))
break;
if (!in_interrupt())
schedule_timeout_interruptible(1);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 7f6af10..3c1adb3 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -906,7 +906,11 @@
BUG_ON(pull_to <= skb_headlen(skb));
__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
}
- BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
+ if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
+ queue->rx.rsp_cons = ++cons;
+ kfree_skb(nskb);
+ return ~0U;
+ }
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
skb_frag_page(nfrag),
@@ -1043,6 +1047,8 @@
skb->len += rx->status;
i = xennet_fill_frags(queue, skb, &tmpq);
+ if (unlikely(i == ~0U))
+ goto err;
if (rx->flags & XEN_NETRXF_csum_blank)
skb->ip_summed = CHECKSUM_PARTIAL;
diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c
index 11bad82..1dbd09c 100644
--- a/drivers/pci/host/pci-aardvark.c
+++ b/drivers/pci/host/pci-aardvark.c
@@ -976,6 +976,7 @@
return -ENOMEM;
}
+ pci_bus_size_bridges(bus);
pci_bus_assign_resources(bus);
list_for_each_entry(child, &bus->children, node)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index a05d143..c7a695c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4236,11 +4236,6 @@
*
* 0x9d10-0x9d1b PCI Express Root port #{1-12}
*
- * The 300 series chipset suffers from the same bug so include those root
- * ports here as well.
- *
- * 0xa32c-0xa343 PCI Express Root port #{0-24}
- *
* [1] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-2.html
* [2] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-datasheet-vol-1.html
* [3] http://www.intel.com/content/www/us/en/chipsets/100-series-chipset-spec-update.html
@@ -4258,7 +4253,6 @@
case 0xa110 ... 0xa11f: case 0xa167 ... 0xa16a: /* Sunrise Point */
case 0xa290 ... 0xa29f: case 0xa2e7 ... 0xa2ee: /* Union Point */
case 0x9d10 ... 0x9d1b: /* 7th & 8th Gen Mobile */
- case 0xa32c ... 0xa343: /* 300 series */
return true;
}
diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c
index 0056294..fe41993 100644
--- a/drivers/platform/x86/alienware-wmi.c
+++ b/drivers/platform/x86/alienware-wmi.c
@@ -518,6 +518,7 @@
if (obj && obj->type == ACPI_TYPE_INTEGER)
*out_data = (u32) obj->integer.value;
}
+ kfree(output.pointer);
return status;
}
diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index 98f75e5..f06e74e 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -26,18 +26,6 @@
#include "iscsi_target_nego.h"
#include "iscsi_target_auth.h"
-static int chap_string_to_hex(unsigned char *dst, unsigned char *src, int len)
-{
- int j = DIV_ROUND_UP(len, 2), rc;
-
- rc = hex2bin(dst, src, j);
- if (rc < 0)
- pr_debug("CHAP string contains non hex digit symbols\n");
-
- dst[j] = '\0';
- return j;
-}
-
static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len)
{
int i;
@@ -240,9 +228,16 @@
pr_err("Could not find CHAP_R.\n");
goto out;
}
+ if (strlen(chap_r) != MD5_SIGNATURE_SIZE * 2) {
+ pr_err("Malformed CHAP_R\n");
+ goto out;
+ }
+ if (hex2bin(client_digest, chap_r, MD5_SIGNATURE_SIZE) < 0) {
+ pr_err("Malformed CHAP_R\n");
+ goto out;
+ }
pr_debug("[server] Got CHAP_R=%s\n", chap_r);
- chap_string_to_hex(client_digest, chap_r, strlen(chap_r));
tfm = crypto_alloc_shash("md5", 0, 0);
if (IS_ERR(tfm)) {
@@ -341,9 +336,7 @@
pr_err("Could not find CHAP_C.\n");
goto out;
}
- pr_debug("[server] Got CHAP_C=%s\n", challenge);
- challenge_len = chap_string_to_hex(challenge_binhex, challenge,
- strlen(challenge));
+ challenge_len = DIV_ROUND_UP(strlen(challenge), 2);
if (!challenge_len) {
pr_err("Unable to convert incoming challenge\n");
goto out;
@@ -352,6 +345,11 @@
pr_err("CHAP_C exceeds maximum binary size of 1024 bytes\n");
goto out;
}
+ if (hex2bin(challenge_binhex, challenge, challenge_len) < 0) {
+ pr_err("Malformed CHAP_C\n");
+ goto out;
+ }
+ pr_debug("[server] Got CHAP_C=%s\n", challenge);
/*
* During mutual authentication, the CHAP_C generated by the
* initiator must not match the original CHAP_C generated by
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index f62c598..638eb9b 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -31,6 +31,8 @@
#include <asm/io.h>
#include <asm/uaccess.h>
+#include <linux/nospec.h>
+
#include <linux/kbd_kern.h>
#include <linux/vt_kern.h>
#include <linux/kbd_diacr.h>
@@ -703,6 +705,8 @@
if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES)
ret = -ENXIO;
else {
+ vsa.console = array_index_nospec(vsa.console,
+ MAX_NR_CONSOLES + 1);
vsa.console--;
console_lock();
ret = vc_allocate(vsa.console);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index e8b3650..e16bc4c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -74,7 +74,7 @@
else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
else if (unlikely(((char *) de - buf) + rlen > size))
- error_msg = "directory entry across range";
+ error_msg = "directory entry overrun";
else if (unlikely(le32_to_cpu(de->inode) >
le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
@@ -83,18 +83,16 @@
if (filp)
ext4_error_file(filp, function, line, bh->b_blocknr,
- "bad entry in directory: %s - offset=%u(%u), "
- "inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset % size),
- offset, le32_to_cpu(de->inode),
- rlen, de->name_len);
+ "bad entry in directory: %s - offset=%u, "
+ "inode=%u, rec_len=%d, name_len=%d, size=%d",
+ error_msg, offset, le32_to_cpu(de->inode),
+ rlen, de->name_len, size);
else
ext4_error_inode(dir, function, line, bh->b_blocknr,
- "bad entry in directory: %s - offset=%u(%u), "
- "inode=%u, rec_len=%d, name_len=%d",
- error_msg, (unsigned) (offset % size),
- offset, le32_to_cpu(de->inode),
- rlen, de->name_len);
+ "bad entry in directory: %s - offset=%u, "
+ "inode=%u, rec_len=%d, name_len=%d, size=%d",
+ error_msg, offset, le32_to_cpu(de->inode),
+ rlen, de->name_len, size);
return 1;
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 74e831f..f901b643 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1768,6 +1768,7 @@
{
int err, inline_size;
struct ext4_iloc iloc;
+ size_t inline_len;
void *inline_pos;
unsigned int offset;
struct ext4_dir_entry_2 *de;
@@ -1795,8 +1796,9 @@
goto out;
}
+ inline_len = ext4_get_inline_size(dir);
offset = EXT4_INLINE_DOTDOT_SIZE;
- while (offset < dir->i_size) {
+ while (offset < inline_len) {
de = ext4_get_inline_entry(dir, &iloc, offset,
&inline_pos, &inline_size);
if (ext4_check_dir_entry(dir, NULL, de,
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index d89754e..c2e830a 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -48,7 +48,6 @@
*/
sb_start_write(sb);
ext4_mmp_csum_set(sb, mmp);
- mark_buffer_dirty(bh);
lock_buffer(bh);
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 09020e4..9bad755 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3434,6 +3434,12 @@
int credits;
u8 old_file_type;
+ if (new.inode && new.inode->i_nlink == 0) {
+ EXT4_ERROR_INODE(new.inode,
+ "target of rename is already freed");
+ return -EFSCORRUPTED;
+ }
+
if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
(!projid_eq(EXT4_I(new_dir)->i_projid,
EXT4_I(old_dentry->d_inode)->i_projid)))
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index eb720d9..1da301e 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -18,6 +18,7 @@
int ext4_resize_begin(struct super_block *sb)
{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
int ret = 0;
if (!capable(CAP_SYS_RESOURCE))
@@ -28,7 +29,7 @@
* because the user tools have no way of handling this. Probably a
* bad time to do it anyways.
*/
- if (EXT4_SB(sb)->s_sbh->b_blocknr !=
+ if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) !=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
ext4_warning(sb, "won't resize using backup superblock at %llu",
(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
@@ -1954,6 +1955,26 @@
}
}
+ /*
+ * Make sure the last group has enough space so that it's
+ * guaranteed to have enough space for all metadata blocks
+ * that it might need to hold. (We might not need to store
+ * the inode table blocks in the last block group, but there
+ * will be cases where this might be needed.)
+ */
+ if ((ext4_group_first_block_no(sb, n_group) +
+ ext4_group_overhead_blocks(sb, n_group) + 2 +
+ sbi->s_itb_per_group + sbi->s_cluster_ratio) >= n_blocks_count) {
+ n_blocks_count = ext4_group_first_block_no(sb, n_group);
+ n_group--;
+ n_blocks_count_retry = 0;
+ if (resize_inode) {
+ iput(resize_inode);
+ resize_inode = NULL;
+ }
+ goto retry;
+ }
+
/* extend the last group */
if (n_group == o_group)
add = n_blocks_count - o_blocks_count;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1a2c223..031e43d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2019,6 +2019,8 @@
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
if (test_opt(sb, DATA_ERR_ABORT))
SEQ_OPTS_PUTS("data_err=abort");
+ if (DUMMY_ENCRYPTION_ENABLED(sbi))
+ SEQ_OPTS_PUTS("test_dummy_encryption");
ext4_show_quota_options(seq, sb);
return 0;
@@ -4193,11 +4195,13 @@
block = ext4_count_free_clusters(sb);
ext4_free_blocks_count_set(sbi->s_es,
EXT4_C2B(sbi, block));
+ ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sb);
sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
+ ext4_superblock_csum_set(sb);
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index d7a53c6..6b24eb4 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -28,6 +28,7 @@
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
+ f2fs_build_fault_attr(sbi, 0, 0);
set_ckpt_flags(sbi, CP_ERROR_FLAG);
if (!end_io)
f2fs_flush_merged_writes(sbi);
@@ -70,6 +71,7 @@
.encrypted_page = NULL,
.is_meta = is_meta,
};
+ int err;
if (unlikely(!is_meta))
fio.op_flags &= ~REQ_META;
@@ -84,9 +86,10 @@
fio.page = page;
- if (f2fs_submit_page_bio(&fio)) {
+ err = f2fs_submit_page_bio(&fio);
+ if (err) {
f2fs_put_page(page, 1);
- goto repeat;
+ return ERR_PTR(err);
}
lock_page(page);
@@ -95,14 +98,9 @@
goto repeat;
}
- /*
- * if there is any IO error when accessing device, make our filesystem
- * readonly and make sure do not write checkpoint with non-uptodate
- * meta page.
- */
if (unlikely(!PageUptodate(page))) {
- memset(page_address(page), 0, PAGE_SIZE);
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
}
out:
return page;
@@ -113,13 +111,32 @@
return __get_meta_page(sbi, index, true);
}
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct page *page;
+ int count = 0;
+
+retry:
+ page = __get_meta_page(sbi, index, true);
+ if (IS_ERR(page)) {
+ if (PTR_ERR(page) == -EIO &&
+ ++count <= DEFAULT_RETRY_IO_COUNT)
+ goto retry;
+
+ f2fs_stop_checkpoint(sbi, false);
+ f2fs_bug_on(sbi, 1);
+ }
+
+ return page;
+}
+
/* for POR only */
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, false);
}
-bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
switch (type) {
@@ -140,8 +157,20 @@
return false;
break;
case META_POR:
+ case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
- blkaddr < MAIN_BLKADDR(sbi)))
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ if (type == DATA_GENERIC) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "access invalid blkaddr:%u", blkaddr);
+ WARN_ON(1);
+ }
+ return false;
+ }
+ break;
+ case META_GENERIC:
+ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
+ blkaddr >= MAIN_BLKADDR(sbi)))
return false;
break;
default:
@@ -176,7 +205,7 @@
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
- if (!f2fs_is_valid_meta_blkaddr(sbi, blkno, type))
+ if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
goto out;
switch (type) {
@@ -242,11 +271,8 @@
trace_f2fs_writepage(page, META);
- if (unlikely(f2fs_cp_error(sbi))) {
- dec_page_count(sbi, F2FS_DIRTY_META);
- unlock_page(page);
- return 0;
- }
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
@@ -529,13 +555,12 @@
spin_lock(&im->ino_lock);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
-#endif
+
if (unlikely(im->ino_num >= sbi->max_orphans))
err = -ENOSPC;
else
@@ -572,12 +597,7 @@
{
struct inode *inode;
struct node_info ni;
- int err = f2fs_acquire_orphan_inode(sbi);
-
- if (err)
- goto err_out;
-
- __add_ino_entry(sbi, ino, 0, ORPHAN_INO);
+ int err;
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
@@ -600,14 +620,15 @@
/* truncate all the data during iput */
iput(inode);
- f2fs_get_node_info(sbi, ino, &ni);
+ err = f2fs_get_node_info(sbi, ino, &ni);
+ if (err)
+ goto err_out;
/* ENOMEM was fully retried in f2fs_evict_inode. */
if (ni.blk_addr != NULL_ADDR) {
err = -EIO;
goto err_out;
}
- __remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
err_out:
@@ -639,7 +660,10 @@
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
+ /*
+ * Turn on quotas which were not enabled for read-only mounts if
+ * filesystem has quota feature, so that they are updated correctly.
+ */
quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
#endif
@@ -649,9 +673,15 @@
f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
for (i = 0; i < orphan_blocks; i++) {
- struct page *page = f2fs_get_meta_page(sbi, start_blk + i);
+ struct page *page;
struct f2fs_orphan_block *orphan_blk;
+ page = f2fs_get_meta_page(sbi, start_blk + i);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
@@ -742,10 +772,14 @@
__u32 crc = 0;
*cp_page = f2fs_get_meta_page(sbi, cp_addr);
+ if (IS_ERR(*cp_page))
+ return PTR_ERR(*cp_page);
+
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
if (crc_offset > (blk_size - sizeof(__le32))) {
+ f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
@@ -753,6 +787,7 @@
crc = cur_cp_crc(*cp_block);
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
+ f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
}
@@ -772,14 +807,22 @@
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_1, version);
if (err)
- goto invalid_cp1;
+ return NULL;
+
+ if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
+ sbi->blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
+ goto invalid_cp;
+ }
pre_version = *version;
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_2, version);
if (err)
- goto invalid_cp2;
+ goto invalid_cp;
cur_version = *version;
if (cur_version == pre_version) {
@@ -787,9 +830,8 @@
f2fs_put_page(cp_page_2, 1);
return cp_page_1;
}
-invalid_cp2:
f2fs_put_page(cp_page_2, 1);
-invalid_cp1:
+invalid_cp:
f2fs_put_page(cp_page_1, 1);
return NULL;
}
@@ -838,15 +880,15 @@
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
- /* Sanity checking of checkpoint */
- if (f2fs_sanity_check_ckpt(sbi))
- goto free_fail_no_cp;
-
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
else
sbi->cur_cp_pack = 2;
+ /* Sanity checking of checkpoint */
+ if (f2fs_sanity_check_ckpt(sbi))
+ goto free_fail_no_cp;
+
if (cp_blks <= 1)
goto done;
@@ -859,6 +901,8 @@
unsigned char *ckpt = (unsigned char *)sbi->ckpt;
cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
+ if (IS_ERR(cur_page))
+ goto free_fail_no_cp;
sit_bitmap_ptr = page_address(cur_page);
memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
f2fs_put_page(cur_page, 1);
@@ -980,12 +1024,10 @@
iput(inode);
/* We need to give cpu to another writers. */
- if (ino == cur_ino) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ if (ino == cur_ino)
cond_resched();
- } else {
+ else
ino = cur_ino;
- }
} else {
/*
* We should submit bio, since it exists several
@@ -1119,7 +1161,7 @@
f2fs_unlock_all(sbi);
}
-static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
DEFINE_WAIT(wait);
@@ -1129,6 +1171,9 @@
if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
+ if (unlikely(f2fs_cp_error(sbi)))
+ break;
+
io_schedule_timeout(5*HZ);
}
finish_wait(&sbi->cp_wait, &wait);
@@ -1202,8 +1247,12 @@
/* writeout cp pack 2 page */
err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
- f2fs_bug_on(sbi, err);
+ if (unlikely(err && f2fs_cp_error(sbi))) {
+ f2fs_put_page(page, 1);
+ return;
+ }
+ f2fs_bug_on(sbi, err);
f2fs_put_page(page, 0);
/* submit checkpoint (with barrier if NOBARRIER is not set) */
@@ -1229,7 +1278,7 @@
while (get_pages(sbi, F2FS_DIRTY_META)) {
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ break;
}
/*
@@ -1309,7 +1358,7 @@
f2fs_sync_meta_pages(sbi, META, LONG_MAX,
FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ break;
}
}
@@ -1348,10 +1397,7 @@
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
/* wait for previous submitted meta pages writeback */
- wait_on_all_pages_writeback(sbi);
-
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_wait_on_all_pages_writeback(sbi);
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1360,12 +1406,19 @@
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
- wait_on_all_pages_writeback(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
+
+ /*
+ * invalidate intermediate page cache borrowed from meta inode
+ * which are used for migration of encrypted inode's blocks.
+ */
+ if (f2fs_sb_has_encrypt(sbi->sb))
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
f2fs_release_ino_entry(sbi, false);
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_reset_fsync_node_info(sbi);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
@@ -1381,7 +1434,7 @@
f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
- return 0;
+ return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0;
}
/*
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f7f2c13..6570c75 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -126,12 +126,10 @@
static void f2fs_read_end_io(struct bio *bio)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
bio->bi_error = -EIO;
}
-#endif
if (f2fs_bio_post_read_required(bio)) {
struct bio_post_read_ctx *ctx = bio->bi_private;
@@ -177,6 +175,8 @@
page->index != nid_of_node(page));
dec_page_count(sbi, type);
+ if (f2fs_in_warm_node_list(sbi, page))
+ f2fs_del_fsync_node_entry(sbi, page);
clear_cold_data(page);
end_page_writeback(page);
}
@@ -263,7 +263,7 @@
if (type != DATA && type != NODE)
goto submit_io;
- if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
+ if (test_opt(sbi, LFS) && current->plug)
blk_finish_plug(current->plug);
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
@@ -441,7 +441,10 @@
fio->encrypted_page : fio->page;
struct inode *inode = fio->page->mapping->host;
- verify_block_addr(fio, fio->new_blkaddr);
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFAULT;
+
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -493,7 +496,7 @@
spin_unlock(&io->io_lock);
}
- if (is_valid_blkaddr(fio->old_blkaddr))
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
verify_block_addr(fio, fio->old_blkaddr);
verify_block_addr(fio, fio->new_blkaddr);
@@ -554,19 +557,22 @@
}
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
- unsigned nr_pages)
+ unsigned nr_pages, unsigned op_flag)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
struct bio_post_read_ctx *ctx;
unsigned int post_read_steps = 0;
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ return ERR_PTR(-EFAULT);
+
bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio)
return ERR_PTR(-ENOMEM);
f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
if (f2fs_encrypted_file(inode) &&
!fscrypt_using_hardware_encryption(inode))
@@ -592,7 +598,7 @@
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr)
{
- struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
+ struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -893,6 +899,7 @@
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
+ block_t old_blkaddr;
pgoff_t fofs;
blkcnt_t count = 1;
int err;
@@ -900,6 +907,10 @@
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
+
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
if (dn->data_blkaddr == NEW_ADDR)
@@ -909,11 +920,13 @@
return err;
alloc:
- f2fs_get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
-
- f2fs_allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
+ old_blkaddr = dn->data_blkaddr;
+ f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
&sum, seg_type, NULL, false);
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
f2fs_set_data_blkaddr(dn);
/* update i_size */
@@ -1069,7 +1082,13 @@
next_block:
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
- if (!is_valid_blkaddr(blkaddr)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
+ err = -EFAULT;
+ goto sync_out;
+ }
+
+ if (!is_valid_data_blkaddr(sbi, blkaddr)) {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1306,7 +1325,11 @@
if (!page)
return -ENOMEM;
- f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
offset = offsetof(struct f2fs_inode, i_addr) +
@@ -1333,7 +1356,11 @@
if (!page)
return -ENOMEM;
- f2fs_get_node_info(sbi, xnid, &ni);
+ err = f2fs_get_node_info(sbi, xnid, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
len = inode->i_sb->s_blocksize;
@@ -1445,10 +1472,15 @@
/*
* This function was originally taken from fs/mpage.c, and customized for f2fs.
* Major change was from block_size == page_size in f2fs by default.
+ *
+ * Note that the aops->readpages() function is ONLY used for read-ahead. If
+ * this function ever deviates from doing just read-ahead, it should either
+ * use ->readpage() or do the necessary surgery to decouple ->readpages()
+ * from read-ahead.
*/
static int f2fs_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
- unsigned nr_pages)
+ unsigned nr_pages, bool is_readahead)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
@@ -1521,6 +1553,10 @@
SetPageUptodate(page);
goto confused;
}
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC))
+ goto set_error_page;
} else {
zero_user_segment(page, 0, PAGE_SIZE);
if (!PageUptodate(page))
@@ -1548,7 +1584,8 @@
}
if (bio == NULL) {
- bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
+ bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
+ is_readahead ? REQ_RAHEAD : 0);
if (IS_ERR(bio)) {
bio = NULL;
goto set_error_page;
@@ -1593,7 +1630,7 @@
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
+ ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
return ret;
}
@@ -1610,12 +1647,13 @@
if (f2fs_has_inline_data(inode))
return 0;
- return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
+ return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
}
static int encrypt_one_page(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
+ struct page *mpage;
gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_file(inode))
@@ -1630,17 +1668,25 @@
fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
PAGE_SIZE, 0, fio->page->index, gfp_flags);
- if (!IS_ERR(fio->encrypted_page))
- return 0;
-
- /* flush pending IOs and wait for a while in the ENOMEM case */
- if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
- f2fs_flush_merged_writes(fio->sbi);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- gfp_flags |= __GFP_NOFAIL;
- goto retry_encrypt;
+ if (IS_ERR(fio->encrypted_page)) {
+ /* flush pending IOs and wait for a while in the ENOMEM case */
+ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
+ f2fs_flush_merged_writes(fio->sbi);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ gfp_flags |= __GFP_NOFAIL;
+ goto retry_encrypt;
+ }
+ return PTR_ERR(fio->encrypted_page);
}
- return PTR_ERR(fio->encrypted_page);
+
+ mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
+ if (mpage) {
+ if (PageUptodate(mpage))
+ memcpy(page_address(mpage),
+ page_address(fio->encrypted_page), PAGE_SIZE);
+ f2fs_put_page(mpage, 1);
+ }
+ return 0;
}
static inline bool check_inplace_update_policy(struct inode *inode,
@@ -1724,6 +1770,7 @@
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
struct extent_info ei = {0,0,0};
+ struct node_info ni;
bool ipu_force = false;
int err = 0;
@@ -1732,11 +1779,13 @@
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
- if (is_valid_blkaddr(fio->old_blkaddr)) {
- ipu_force = true;
- fio->need_lock = LOCK_DONE;
- goto got_it;
- }
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC))
+ return -EFAULT;
+
+ ipu_force = true;
+ fio->need_lock = LOCK_DONE;
+ goto got_it;
}
/* Deadlock due to between page->lock and f2fs_lock_op */
@@ -1755,11 +1804,17 @@
goto out_writepage;
}
got_it:
+ if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
+ !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC)) {
+ err = -EFAULT;
+ goto out_writepage;
+ }
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) &&
+ if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
@@ -1784,6 +1839,12 @@
fio->need_lock = LOCK_REQ;
}
+ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+ if (err)
+ goto out_writepage;
+
+ fio->version = ni.version;
+
err = encrypt_one_page(fio);
if (err)
goto out_writepage;
@@ -2207,10 +2268,14 @@
loff_t i_size = i_size_read(inode);
if (to > i_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -2315,8 +2380,9 @@
}
trace_f2fs_write_begin(inode, pos, len, flags);
- if (f2fs_is_atomic_file(inode) &&
- !f2fs_available_free_memory(sbi, INMEM_PAGES)) {
+ if ((f2fs_is_atomic_file(inode) &&
+ !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
+ is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
err = -ENOMEM;
drop_atomic = true;
goto fail;
@@ -2441,14 +2507,20 @@
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
loff_t offset)
{
- unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
+ unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
+ unsigned blkbits = i_blkbits;
+ unsigned blocksize_mask = (1 << blkbits) - 1;
+ unsigned long align = offset | iov_iter_alignment(iter);
+ struct block_device *bdev = inode->i_sb->s_bdev;
- if (offset & blocksize_mask)
- return -EINVAL;
-
- if (iov_iter_alignment(iter) & blocksize_mask)
- return -EINVAL;
-
+ if (align & blocksize_mask) {
+ if (bdev)
+ blkbits = blksize_bits(bdev_logical_block_size(bdev));
+ blocksize_mask = (1 << blkbits) - 1;
+ if (align & blocksize_mask)
+ return -EINVAL;
+ return 1;
+ }
return 0;
}
@@ -2466,7 +2538,7 @@
err = check_direct_IO(inode, iter, offset);
if (err)
- return err;
+ return err < 0 ? err : 0;
if (f2fs_force_buffered_io(inode, rw))
return 0;
@@ -2588,6 +2660,10 @@
if (!PageUptodate(page))
SetPageUptodate(page);
+ /* don't remain PG_checked flag which was set during GC */
+ if (is_cold_data(page))
+ clear_cold_data(page);
+
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
f2fs_register_inmem_page(inode, page);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 2d65e77..214a968 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -215,7 +215,8 @@
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
- si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+ si->base_mem += NM_I(sbi)->nat_blocks *
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
si->base_mem += NM_I(sbi)->nat_blocks / 8;
si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 63c0732..56cc274 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -517,12 +517,11 @@
}
start:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
-#endif
+
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b696bc8..5bcbdce 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -43,7 +43,6 @@
} while (0)
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
enum {
FAULT_KMALLOC,
FAULT_KVMALLOC,
@@ -58,16 +57,20 @@
FAULT_TRUNCATE,
FAULT_IO,
FAULT_CHECKPOINT,
+ FAULT_DISCARD,
FAULT_MAX,
};
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)
+
struct f2fs_fault_info {
atomic_t inject_ops;
unsigned int inject_rate;
unsigned int inject_type;
};
-extern char *fault_name[FAULT_MAX];
+extern char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
@@ -180,7 +183,6 @@
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
-#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
@@ -196,7 +198,7 @@
};
/*
- * For CP/NAT/SIT/SSA readahead
+ * indicate meta/data type
*/
enum {
META_CP,
@@ -204,6 +206,8 @@
META_SIT,
META_SSA,
META_POR,
+ DATA_GENERIC,
+ META_GENERIC,
};
/* for the list of ino */
@@ -228,6 +232,12 @@
struct inode *inode; /* vfs inode pointer */
};
+struct fsync_node_entry {
+ struct list_head list; /* list head */
+ struct page *page; /* warm node page pointer */
+ unsigned int seq_id; /* sequence id */
+};
+
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
@@ -244,9 +254,10 @@
(MAX_PLIST_NUM - 1) : (blk_num - 1))
enum {
- D_PREP,
- D_SUBMIT,
- D_DONE,
+ D_PREP, /* initial */
+ D_PARTIAL, /* partially submitted */
+ D_SUBMIT, /* all submitted */
+ D_DONE, /* finished */
};
struct discard_info {
@@ -271,7 +282,10 @@
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
+ unsigned char issuing; /* issuing discard */
int error; /* bio error */
+ spinlock_t lock; /* for state/bio_ref updating */
+ unsigned short bio_ref; /* bio reference count */
};
enum {
@@ -291,6 +305,7 @@
unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
bool io_aware; /* issue discard in idle time */
bool sync; /* submit discard with REQ_SYNC flag */
+ bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
};
@@ -307,10 +322,12 @@
unsigned int max_discards; /* max. discards to be issued */
unsigned int discard_granularity; /* discard granularity */
unsigned int undiscard_blks; /* # of undiscard blocks */
+ unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
atomic_t issing_discard; /* # of issing discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root root; /* root of discard rb-tree */
+ bool rbtree_check; /* config for consistence check */
};
/* for the list of fsync inodes, used only during recovery */
@@ -507,13 +524,12 @@
*/
};
+#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
+
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
-/* vector size for gang look-up from extent cache that consists of radix tree */
-#define EXT_TREE_VEC_SIZE 64
-
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
@@ -599,6 +615,8 @@
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40 /* reserved */
+#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
+
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
@@ -697,22 +715,22 @@
}
static inline bool __is_discard_mergeable(struct discard_info *back,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
return (back->lstart + back->len == front->lstart) &&
- (back->len + front->len < DEF_MAX_DISCARD_LEN);
+ (back->len + front->len <= max_len);
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
- struct discard_info *back)
+ struct discard_info *back, unsigned int max_len)
{
- return __is_discard_mergeable(back, cur);
+ return __is_discard_mergeable(back, cur, max_len);
}
static inline bool __is_discard_front_mergeable(struct discard_info *cur,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
- return __is_discard_mergeable(cur, front);
+ return __is_discard_mergeable(cur, front, max_len);
}
static inline bool __is_extent_mergeable(struct extent_info *back,
@@ -767,6 +785,7 @@
struct radix_tree_root nat_set_root;/* root of the nat set cache */
struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
+ spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
unsigned int nat_blocks; /* # of nat blocks */
@@ -1015,6 +1034,7 @@
bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
+ unsigned char version; /* version of the node */
};
#define is_read_io(rw) ((rw) == READ)
@@ -1066,6 +1086,7 @@
SBI_POR_DOING, /* recovery is doing or not */
SBI_NEED_SB_WRITE, /* need to recover superblock */
SBI_NEED_CP, /* need to checkpoint */
+ SBI_IS_SHUTDOWN, /* shutdown by ioctl */
};
enum {
@@ -1149,6 +1170,11 @@
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
+ spinlock_t fsync_node_lock; /* for node entry lock */
+ struct list_head fsync_node_list; /* node list head */
+ unsigned int fsync_seg_id; /* sequence id */
+ unsigned int fsync_node_num; /* number of node entries */
+
/* for orphan inode, use 0'th array */
unsigned int max_orphans; /* max orphan inodes */
@@ -1216,6 +1242,7 @@
unsigned int gc_mode; /* current GC state */
/* for skip statistic */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
+ unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
@@ -1280,7 +1307,7 @@
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
printk("%sF2FS-fs : inject %s in %s of %pF\n", \
- KERN_INFO, fault_name[type], \
+ KERN_INFO, f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
@@ -1299,6 +1326,12 @@
}
return false;
}
+#else
+#define f2fs_show_injection_info(type) do { } while (0)
+static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
+{
+ return false;
+}
#endif
/* For write statistics. Suppose sector size is 512 bytes,
@@ -1327,7 +1360,7 @@
struct request_list *rl = &q->root_rl;
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
- return 0;
+ return false;
return f2fs_time_over(sbi, REQ_TIME);
}
@@ -1651,13 +1684,12 @@
if (ret)
return ret;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
release = *count;
goto enospc;
}
-#endif
+
/*
* let's increase this in prior to actual block count change in order
* for f2fs_sync_file to avoid data races when deciding checkpoint.
@@ -1681,18 +1713,20 @@
sbi->total_valid_block_count -= diff;
if (!*count) {
spin_unlock(&sbi->stat_lock);
- percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
goto enospc;
}
}
spin_unlock(&sbi->stat_lock);
- if (unlikely(release))
+ if (unlikely(release)) {
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
+ }
f2fs_i_blocks_write(inode, *count, true, true);
return 0;
enospc:
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
return -ENOSPC;
}
@@ -1864,12 +1898,10 @@
return ret;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
goto enospc;
}
-#endif
spin_lock(&sbi->stat_lock);
@@ -1954,17 +1986,23 @@
static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
pgoff_t index, bool for_write)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- struct page *page = find_lock_page(mapping, index);
+ struct page *page;
- if (page)
- return page;
+ if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) {
+ if (!for_write)
+ page = find_get_page_flags(mapping, index,
+ FGP_LOCK | FGP_ACCESSED);
+ else
+ page = find_lock_page(mapping, index);
+ if (page)
+ return page;
- if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
- f2fs_show_injection_info(FAULT_PAGE_ALLOC);
- return NULL;
+ if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
+ f2fs_show_injection_info(FAULT_PAGE_ALLOC);
+ return NULL;
+ }
}
-#endif
+
if (!for_write)
return grab_cache_page(mapping, index);
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
@@ -1974,12 +2012,11 @@
struct address_space *mapping, pgoff_t index,
int fgp_flags, gfp_t gfp_mask)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
f2fs_show_injection_info(FAULT_PAGE_GET);
return NULL;
}
-#endif
+
return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
}
@@ -2044,12 +2081,11 @@
bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
return bio;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
f2fs_show_injection_info(FAULT_ALLOC_BIO);
return NULL;
}
-#endif
+
return bio_alloc(GFP_KERNEL, npages);
}
@@ -2584,12 +2620,11 @@
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
-#endif
+
return kmalloc(size, flags);
}
@@ -2632,12 +2667,11 @@
static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KVMALLOC)) {
f2fs_show_injection_info(FAULT_KVMALLOC);
return NULL;
}
-#endif
+
return kvmalloc(size, flags);
}
@@ -2696,13 +2730,39 @@
spin_unlock(&sbi->iostat_lock);
}
-static inline bool is_valid_blkaddr(block_t blkaddr)
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
+ (!is_read_io(fio->op) || fio->is_meta))
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
+ f2fs_bug_on(sbi, 1);
+ }
+}
+
+static inline bool __is_valid_data_blkaddr(block_t blkaddr)
{
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
return false;
return true;
}
+static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ if (!__is_valid_data_blkaddr(blkaddr))
+ return false;
+ verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
+ return true;
+}
+
/*
* file.c
*/
@@ -2817,16 +2877,21 @@
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi);
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi);
int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
-void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct node_info *ni);
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
int f2fs_truncate_xattr_node(struct inode *inode);
-int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id);
int f2fs_remove_inode_page(struct inode *inode);
struct page *f2fs_new_inode_page(struct inode *inode);
struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
@@ -2835,11 +2900,12 @@
struct page *f2fs_get_node_page_ra(struct page *parent, int start);
void f2fs_move_node_page(struct page *node_page, int gc_type);
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic);
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id);
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type);
-void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
@@ -2847,7 +2913,7 @@
void f2fs_recover_inline_xattr(struct inode *inode, struct page *page);
int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
-void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
@@ -2925,9 +2991,10 @@
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
-bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
- block_t blkaddr, int type);
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
@@ -2951,6 +3018,7 @@
void f2fs_update_dirty_page(struct inode *inode, struct page *page);
void f2fs_remove_dirty_inode(struct inode *inode);
int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
int __init f2fs_create_checkpoint_caches(void);
@@ -3389,7 +3457,7 @@
return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
#else
- return 0;
+ return false;
#endif
}
@@ -3411,4 +3479,11 @@
fscrypt_using_hardware_encryption(inode));
}
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type);
+#else
+#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0)
+#endif
+
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 18f415a..4636b01 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -215,6 +215,7 @@
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
+ unsigned int seq_id = 0;
if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
@@ -277,7 +278,7 @@
}
sync_nodes:
atomic_inc(&sbi->wb_sync_req[NODE]);
- ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic);
+ ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
atomic_dec(&sbi->wb_sync_req[NODE]);
if (ret)
goto out;
@@ -303,7 +304,7 @@
* given fsync mark.
*/
if (!atomic) {
- ret = f2fs_wait_on_node_pages_writeback(sbi, ino);
+ ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
if (ret)
goto out;
}
@@ -352,13 +353,13 @@
return pgofs;
}
-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
- int whence)
+static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
+ pgoff_t dirty, pgoff_t pgofs, int whence)
{
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- is_valid_blkaddr(blkaddr))
+ is_valid_data_blkaddr(sbi, blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -422,7 +423,15 @@
blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
- if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ blkaddr, DATA_GENERIC)) {
+ f2fs_put_dnode(&dn);
+ goto fail;
+ }
+
+ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
}
@@ -515,6 +524,11 @@
dn->data_blkaddr = NULL_ADDR;
f2fs_set_data_blkaddr(dn);
+
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ continue;
+
f2fs_invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
@@ -656,12 +670,11 @@
trace_f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
return -EIO;
}
-#endif
+
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -785,22 +798,26 @@
}
if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size <= i_size_read(inode)) {
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
- err = f2fs_truncate(inode);
- up_write(&F2FS_I(inode)->i_mmap_sem);
- if (err)
- return err;
- } else {
- /*
- * do not trim all blocks after i_size if target size is
- * larger than i_size.
- */
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ bool to_smaller = (attr->ia_size <= i_size_read(inode));
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_setsize(inode, attr->ia_size);
+
+ if (to_smaller)
+ err = f2fs_truncate(inode);
+ /*
+ * do not trim all blocks after i_size if target size is
+ * larger than i_size.
+ */
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+
+ if (err)
+ return err;
+
+ if (!to_smaller) {
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -947,14 +964,19 @@
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
f2fs_lock_op(sbi);
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -1057,7 +1079,12 @@
if (ret)
return ret;
- f2fs_get_node_info(sbi, dn.nid, &ni);
+ ret = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (ret) {
+ f2fs_put_dnode(&dn);
+ return ret;
+ }
+
ilen = min((pgoff_t)
ADDRS_PER_PAGE(dn.node_page, dst_inode) -
dn.ofs_in_node, len - i);
@@ -1164,25 +1191,33 @@
return ret;
}
-static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
+static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ pgoff_t start = offset >> PAGE_SHIFT;
+ pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;
f2fs_balance_fs(sbi, true);
+
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
f2fs_lock_op(sbi);
-
f2fs_drop_extent_tree(inode);
-
+ truncate_pagecache(inode, offset);
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);
+
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
- pgoff_t pg_start, pg_end;
loff_t new_size;
int ret;
@@ -1197,25 +1232,17 @@
if (ret)
return ret;
- pg_start = offset >> PAGE_SHIFT;
- pg_end = (offset + len) >> PAGE_SHIFT;
-
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
- down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out_unlock;
+ return ret;
- truncate_pagecache(inode, offset);
-
- ret = f2fs_do_collapse(inode, pg_start, pg_end);
+ ret = f2fs_do_collapse(inode, offset, len);
if (ret)
- goto out_unlock;
+ return ret;
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
@@ -1223,11 +1250,9 @@
truncate_pagecache(inode, new_size);
ret = f2fs_truncate_blocks(inode, new_size, true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out_unlock:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1293,12 +1318,9 @@
if (ret)
return ret;
- down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
- goto out_sem;
-
- truncate_pagecache_range(inode, offset, offset + len - 1);
+ return ret;
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1310,7 +1332,7 @@
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size, offset + len);
} else {
@@ -1318,7 +1340,7 @@
ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT);
@@ -1329,12 +1351,21 @@
unsigned int end_offset;
pgoff_t end;
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_pagecache_range(inode,
+ (loff_t)index << PAGE_SHIFT,
+ ((loff_t)pg_end << PAGE_SHIFT) - 1);
+
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -1343,7 +1374,10 @@
ret = f2fs_do_zero_range(&dn, index, end);
f2fs_put_dnode(&dn);
+
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1371,9 +1405,6 @@
else
f2fs_i_size_write(inode, new_size);
}
-out_sem:
- up_write(&F2FS_I(inode)->i_mmap_sem);
-
return ret;
}
@@ -1402,26 +1433,27 @@
f2fs_balance_fs(sbi, true);
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (ret)
- goto out;
+ return ret;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out;
-
- truncate_pagecache(inode, offset);
+ return ret;
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+ truncate_pagecache(inode, offset);
+
while (!ret && idx > pg_start) {
nr = idx - pg_start;
if (nr > delta)
@@ -1435,16 +1467,17 @@
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1600,7 +1633,7 @@
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned int flags = fi->i_flags;
- if (file_is_encrypt(inode))
+ if (f2fs_encrypted_inode(inode))
flags |= F2FS_ENCRYPT_FL;
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
flags |= F2FS_INLINE_DATA_FL;
@@ -1684,15 +1717,18 @@
inode_lock(inode);
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode)) {
+ if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
+ ret = -EINVAL;
goto out;
+ }
ret = f2fs_convert_inline_inode(inode);
if (ret)
goto out;
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+
if (!get_dirty_pages(inode))
goto skip_flush;
@@ -1700,18 +1736,20 @@
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
- if (ret)
+ if (ret) {
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
+ }
skip_flush:
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
out:
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1729,9 +1767,9 @@
if (ret)
return ret;
- inode_lock(inode);
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ inode_lock(inode);
if (f2fs_is_volatile_file(inode)) {
ret = -EINVAL;
@@ -1757,7 +1795,6 @@
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
ret = -EINVAL;
}
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1849,6 +1886,8 @@
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
+ clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -1885,6 +1924,7 @@
}
if (sb) {
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev, sb);
}
break;
@@ -1894,13 +1934,16 @@
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
default:
ret = -EINVAL;
@@ -2103,7 +2146,7 @@
return ret;
}
-static int f2fs_ioc_f2fs_write_checkpoint(struct file *filp, unsigned long arg)
+static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -2347,15 +2390,10 @@
}
inode_lock(src);
- down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
if (!inode_trylock(dst))
goto out;
- if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) {
- inode_unlock(dst);
- goto out;
- }
}
ret = -EINVAL;
@@ -2400,6 +2438,14 @@
goto out_unlock;
f2fs_balance_fs(sbi, true);
+
+ down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+ if (src != dst) {
+ ret = -EBUSY;
+ if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
+ goto out_src;
+ }
+
f2fs_lock_op(sbi);
ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
pos_out >> F2FS_BLKSIZE_BITS,
@@ -2412,13 +2458,15 @@
f2fs_i_size_write(dst, dst_osize);
}
f2fs_unlock_op(sbi);
-out_unlock:
- if (src != dst) {
+
+ if (src != dst)
up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
- inode_unlock(dst);
- }
-out:
+out_src:
up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+out_unlock:
+ if (src != dst)
+ inode_unlock(dst);
+out:
inode_unlock(src);
return ret;
}
@@ -2590,7 +2638,7 @@
if (!pin) {
clear_inode_flag(inode, FI_PIN_FILE);
- F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = 1;
+ f2fs_i_gc_failures_write(inode, 0);
goto done;
}
@@ -2696,7 +2744,7 @@
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
return f2fs_ioc_gc_range(filp, arg);
case F2FS_IOC_WRITE_CHECKPOINT:
- return f2fs_ioc_f2fs_write_checkpoint(filp, arg);
+ return f2fs_ioc_write_checkpoint(filp, arg);
case F2FS_IOC_DEFRAGMENT:
return f2fs_ioc_defragment(filp, arg);
case F2FS_IOC_MOVE_RANGE:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 9be5b50..c6322ef 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -53,12 +53,10 @@
continue;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
if (!sb_start_write_trylock(sbi->sb))
continue;
@@ -517,7 +515,11 @@
continue;
}
- f2fs_get_node_info(sbi, nid, &ni);
+ if (f2fs_get_node_info(sbi, nid, &ni)) {
+ f2fs_put_page(node_page, 1);
+ continue;
+ }
+
if (ni.blk_addr != start_addr + off) {
f2fs_put_page(node_page, 1);
continue;
@@ -576,7 +578,10 @@
if (IS_ERR(node_page))
return false;
- f2fs_get_node_info(sbi, nid, dni);
+ if (f2fs_get_node_info(sbi, nid, dni)) {
+ f2fs_put_page(node_page, 1);
+ return false;
+ }
if (sum->version != dni->version) {
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -594,6 +599,72 @@
return true;
}
+static int ra_data_block(struct inode *inode, pgoff_t index)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
+ struct dnode_of_data dn;
+ struct page *page;
+ struct extent_info ei = {0, 0, 0};
+ struct f2fs_io_info fio = {
+ .sbi = sbi,
+ .ino = inode->i_ino,
+ .type = DATA,
+ .temp = COLD,
+ .op = REQ_OP_READ,
+ .op_flags = 0,
+ .encrypted_page = NULL,
+ .in_list = false,
+ .retry = false,
+ };
+ int err;
+
+ page = f2fs_grab_cache_page(mapping, index, true);
+ if (!page)
+ return -ENOMEM;
+
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ goto got_it;
+ }
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err)
+ goto put_page;
+ f2fs_put_dnode(&dn);
+
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+ DATA_GENERIC))) {
+ err = -EFAULT;
+ goto put_page;
+ }
+got_it:
+ /* read page */
+ fio.page = page;
+ fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
+
+ fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
+ dn.data_blkaddr,
+ FGP_LOCK | FGP_CREAT, GFP_NOFS);
+ if (!fio.encrypted_page) {
+ err = -ENOMEM;
+ goto put_page;
+ }
+
+ err = f2fs_submit_page_bio(&fio);
+ if (err)
+ goto put_encrypted_page;
+ f2fs_put_page(fio.encrypted_page, 0);
+ f2fs_put_page(page, 1);
+ return 0;
+put_encrypted_page:
+ f2fs_put_page(fio.encrypted_page, 1);
+put_page:
+ f2fs_put_page(page, 1);
+ return err;
+}
+
/*
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
@@ -615,7 +686,7 @@
struct dnode_of_data dn;
struct f2fs_summary sum;
struct node_info ni;
- struct page *page;
+ struct page *page, *mpage;
block_t newaddr;
int err;
bool lfs_mode = test_opt(fio.sbi, LFS);
@@ -655,7 +726,10 @@
*/
f2fs_wait_on_page_writeback(page, DATA, true);
- f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ if (err)
+ goto put_out;
+
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* read page */
@@ -675,6 +749,23 @@
goto recover_block;
}
+ mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, FGP_LOCK, GFP_NOFS);
+ if (mpage) {
+ bool updated = false;
+
+ if (PageUptodate(mpage)) {
+ memcpy(page_address(fio.encrypted_page),
+ page_address(mpage), PAGE_SIZE);
+ updated = true;
+ }
+ f2fs_put_page(mpage, 1);
+ invalidate_mapping_pages(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, fio.old_blkaddr);
+ if (updated)
+ goto write_page;
+ }
+
err = f2fs_submit_page_bio(&fio);
if (err)
goto put_page_out;
@@ -691,6 +782,7 @@
goto put_page_out;
}
+write_page:
set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
@@ -865,22 +957,30 @@
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
- /* if inode uses special I/O path, let's go phase 3 */
+ if (!down_write_trylock(
+ &F2FS_I(inode)->i_gc_rwsem[WRITE])) {
+ iput(inode);
+ sbi->skipped_gc_rwsem++;
+ continue;
+ }
+
+ start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
+ ofs_in_node;
+
if (f2fs_post_read_required(inode)) {
+ int err = ra_data_block(inode, start_bidx);
+
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ if (err) {
+ iput(inode);
+ continue;
+ }
add_gc_inode(gc_list, inode);
continue;
}
- if (!down_write_trylock(
- &F2FS_I(inode)->i_gc_rwsem[WRITE])) {
- iput(inode);
- continue;
- }
-
- start_bidx = f2fs_start_bidx_of_node(nofs, inode);
data_page = f2fs_get_read_data_page(inode,
- start_bidx + ofs_in_node, REQ_RAHEAD,
- true);
+ start_bidx, REQ_RAHEAD, true);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
@@ -903,6 +1003,7 @@
continue;
if (!down_write_trylock(
&fi->i_gc_rwsem[WRITE])) {
+ sbi->skipped_gc_rwsem++;
up_write(&fi->i_gc_rwsem[READ]);
continue;
}
@@ -1040,6 +1141,7 @@
.iroot = RADIX_TREE_INIT(GFP_NOFS),
};
unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
+ unsigned long long first_skipped;
unsigned int skipped_round = 0, round = 0;
trace_f2fs_gc_begin(sbi->sb, sync, background,
@@ -1052,6 +1154,8 @@
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
+ sbi->skipped_gc_rwsem = 0;
+ first_skipped = last_skipped;
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
ret = -EINVAL;
@@ -1093,7 +1197,8 @@
total_freed += seg_freed;
if (gc_type == FG_GC) {
- if (sbi->skipped_atomic_files[FG_GC] > last_skipped)
+ if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
+ sbi->skipped_gc_rwsem)
skipped_round++;
last_skipped = sbi->skipped_atomic_files[FG_GC];
round++;
@@ -1102,15 +1207,23 @@
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
- if (!sync) {
- if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
- if (skipped_round > MAX_SKIP_ATOMIC_COUNT &&
- skipped_round * 2 >= round)
- f2fs_drop_inmem_pages_all(sbi, true);
+ if (sync)
+ goto stop;
+
+ if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
+ if (skipped_round <= MAX_SKIP_GC_COUNT ||
+ skipped_round * 2 < round) {
segno = NULL_SEGNO;
goto gc_more;
}
+ if (first_skipped < last_skipped &&
+ (last_skipped - first_skipped) >
+ sbi->skipped_gc_rwsem) {
+ f2fs_drop_inmem_pages_all(sbi, true);
+ segno = NULL_SEGNO;
+ goto gc_more;
+ }
if (gc_type == FG_GC)
ret = f2fs_write_checkpoint(sbi, &cpc);
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index dd4cde8..df71d26 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -139,6 +139,7 @@
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
+ struct node_info ni;
int dirty, err;
if (!f2fs_exist_data(dn->inode))
@@ -148,6 +149,14 @@
if (err)
return err;
+ err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
+ if (err) {
+ f2fs_put_dnode(dn);
+ return err;
+ }
+
+ fio.version = ni.version;
+
if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(dn);
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
@@ -516,6 +525,7 @@
return 0;
recover:
lock_page(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
@@ -707,7 +717,10 @@
ilen = start + len;
ilen -= start;
- f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ if (err)
+ goto out;
+
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 30a7773..959df22 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -68,13 +68,16 @@
}
}
-static bool __written_first_block(struct f2fs_inode *ri)
+static int __written_first_block(struct f2fs_sb_info *sbi,
+ struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
- if (is_valid_blkaddr(addr))
- return true;
- return false;
+ if (!__is_valid_data_blkaddr(addr))
+ return 1;
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
+ return -EFAULT;
+ return 0;
}
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -121,7 +124,7 @@
if (!f2fs_sb_has_inode_chksum(sbi->sb))
return false;
- if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
+ if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
@@ -159,8 +162,15 @@
struct f2fs_inode *ri;
__u32 provided, calculated;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)))
+ return true;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (!f2fs_enable_inode_chksum(sbi, page))
+#else
if (!f2fs_enable_inode_chksum(sbi, page) ||
PageDirty(page) || PageWriteback(page))
+#endif
return true;
ri = &F2FS_NODE(page)->i;
@@ -185,9 +195,31 @@
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
-static bool sanity_check_inode(struct inode *inode)
+static bool sanity_check_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned long long iblocks;
+
+ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
+ if (!iblocks) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
+ return false;
+ }
+
+ if (ino_of_node(node_page) != nid_of_node(node_page)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode footer i_ino=%lx, ino,nid: "
+ "[%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
+ return false;
+ }
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
&& !f2fs_has_extra_attr(inode)) {
@@ -197,6 +229,64 @@
__func__, inode->i_ino);
return false;
}
+
+ if (f2fs_has_extra_attr(inode) &&
+ !f2fs_sb_has_extra_attr(sbi->sb)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) is with extra_attr, "
+ "but extra_attr feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+
+ if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
+ fi->i_extra_isize % sizeof(__le32)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
+ "max: %zu",
+ __func__, inode->i_ino, fi->i_extra_isize,
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ return false;
+ }
+
+ if (F2FS_I(inode)->extent_tree) {
+ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
+
+ if (ei->len &&
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ DATA_GENERIC))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) extent info [%u, %u, %u] "
+ "is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
+ return false;
+ }
+ }
+
+ if (f2fs_has_inline_data(inode) &&
+ (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx, mode=%u) should not have "
+ "inline_data, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
+ if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx, mode=%u) should not have "
+ "inline_dentry, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
return true;
}
@@ -207,6 +297,7 @@
struct page *node_page;
struct f2fs_inode *ri;
projid_t i_projid;
+ int err;
/* Check if ino is within scope */
if (f2fs_check_nid_range(sbi, inode->i_ino))
@@ -268,6 +359,11 @@
fi->i_inline_xattr_size = 0;
}
+ if (!sanity_check_inode(inode, node_page)) {
+ f2fs_put_page(node_page, 1);
+ return -EINVAL;
+ }
+
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -275,8 +371,15 @@
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
- if (__written_first_block(ri))
- set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ if (S_ISREG(inode->i_mode)) {
+ err = __written_first_block(sbi, ri);
+ if (err < 0) {
+ f2fs_put_page(node_page, 1);
+ return err;
+ }
+ if (!err)
+ set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ }
if (!f2fs_need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
@@ -330,10 +433,6 @@
ret = do_read_inode(inode);
if (ret)
goto bad_inode;
- if (!sanity_check_inode(inode)) {
- ret = -EINVAL;
- goto bad_inode;
- }
make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_node_aops;
@@ -474,6 +573,10 @@
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
+#endif
}
void f2fs_update_inode_page(struct inode *inode)
@@ -558,12 +661,11 @@
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
-#endif
+
if (!err) {
f2fs_lock_op(sbi);
err = f2fs_remove_inode_page(inode);
@@ -626,6 +728,7 @@
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
+ int err;
/*
* clear nlink of inode in order to release resource of inode
@@ -648,10 +751,16 @@
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
- f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "May loss orphan inode, run fsck to fix.");
+ goto out;
+ }
if (ni.blk_addr != NULL_ADDR) {
- int err = f2fs_acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -664,6 +773,7 @@
set_inode_flag(inode, FI_FREE_NID);
}
+out:
f2fs_unlock_op(sbi);
/* iput will drop the inode object */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index c53760e..56593b3 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -246,7 +246,7 @@
return -EINVAL;
if (hot) {
- strncpy(extlist[count], name, strlen(name));
+ memcpy(extlist[count], name, strlen(name));
sbi->raw_super->hot_ext_count = hot_count + 1;
} else {
char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
@@ -254,7 +254,7 @@
memcpy(buf, &extlist[cold_count],
F2FS_EXTENSION_LEN * hot_count);
memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
- strncpy(extlist[cold_count], name, strlen(name));
+ memcpy(extlist[cold_count], name, strlen(name));
memcpy(&extlist[cold_count + 1], buf,
F2FS_EXTENSION_LEN * hot_count);
sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index aab2dd2..f213a53 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -28,6 +28,7 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
static struct kmem_cache *nat_entry_set_slab;
+static struct kmem_cache *fsync_node_entry_slab;
/*
* Check whether the given nid is within node id range.
@@ -112,25 +113,22 @@
static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- pgoff_t index = current_nat_addr(sbi, nid);
- return f2fs_get_meta_page(sbi, index);
+ return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
}
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct page *src_page;
struct page *dst_page;
- pgoff_t src_off;
pgoff_t dst_off;
void *src_addr;
void *dst_addr;
struct f2fs_nm_info *nm_i = NM_I(sbi);
- src_off = current_nat_addr(sbi, nid);
- dst_off = next_nat_addr(sbi, src_off);
+ dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
/* get current nat block page with lock */
- src_page = f2fs_get_meta_page(sbi, src_off);
+ src_page = get_current_nat_page(sbi, nid);
dst_page = f2fs_grab_meta_page(sbi, dst_off);
f2fs_bug_on(sbi, PageDirty(src_page));
@@ -176,14 +174,30 @@
if (raw_ne)
node_info_from_raw_nat(&ne->ni, raw_ne);
+
+ spin_lock(&nm_i->nat_list_lock);
list_add_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
nm_i->nat_cnt++;
return ne;
}
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
- return radix_tree_lookup(&nm_i->nat_root, n);
+ struct nat_entry *ne;
+
+ ne = radix_tree_lookup(&nm_i->nat_root, n);
+
+ /* for recent accessed nat entry, move it to tail of lru list */
+ if (ne && !get_nat_flag(ne, IS_DIRTY)) {
+ spin_lock(&nm_i->nat_list_lock);
+ if (!list_empty(&ne->list))
+ list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+ }
+
+ return ne;
}
static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
@@ -194,7 +208,6 @@
static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
{
- list_del(&e->list);
radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
nm_i->nat_cnt--;
__free_nat_entry(e);
@@ -245,16 +258,21 @@
nm_i->dirty_nat_cnt++;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
+ spin_lock(&nm_i->nat_list_lock);
if (new_ne)
list_del_init(&ne->list);
else
list_move_tail(&ne->list, &head->entry_list);
+ spin_unlock(&nm_i->nat_list_lock);
}
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry_set *set, struct nat_entry *ne)
{
+ spin_lock(&nm_i->nat_list_lock);
list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
set_nat_flag(ne, IS_DIRTY, false);
set->entry_cnt--;
nm_i->dirty_nat_cnt--;
@@ -267,6 +285,72 @@
start, nr);
}
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
+{
+ return NODE_MAPPING(sbi) == page->mapping &&
+ IS_DNODE(page) && is_cold_node(page);
+}
+
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ spin_lock_init(&sbi->fsync_node_lock);
+ INIT_LIST_HEAD(&sbi->fsync_node_list);
+ sbi->fsync_seg_id = 0;
+ sbi->fsync_node_num = 0;
+}
+
+static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
+ struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+ unsigned int seq_id;
+
+ fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);
+
+ get_page(page);
+ fn->page = page;
+ INIT_LIST_HEAD(&fn->list);
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_add_tail(&fn->list, &sbi->fsync_node_list);
+ fn->seq_id = sbi->fsync_seg_id++;
+ seq_id = fn->seq_id;
+ sbi->fsync_node_num++;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+
+ return seq_id;
+}
+
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_for_each_entry(fn, &sbi->fsync_node_list, list) {
+ if (fn->page == page) {
+ list_del(&fn->list);
+ sbi->fsync_node_num--;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ kmem_cache_free(fsync_node_entry_slab, fn);
+ put_page(page);
+ return;
+ }
+ }
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ f2fs_bug_on(sbi, 1);
+}
+
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ sbi->fsync_seg_id = 0;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+}
+
int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -371,7 +455,7 @@
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) &&
+ f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -382,7 +466,7 @@
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (!is_valid_blkaddr(new_blkaddr))
+ if (!is_valid_data_blkaddr(sbi, new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -405,13 +489,25 @@
if (!down_write_trylock(&nm_i->nat_tree_lock))
return 0;
- while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
+ spin_lock(&nm_i->nat_list_lock);
+ while (nr_shrink) {
struct nat_entry *ne;
+
+ if (list_empty(&nm_i->nat_entries))
+ break;
+
ne = list_first_entry(&nm_i->nat_entries,
struct nat_entry, list);
+ list_del(&ne->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
+
+ spin_lock(&nm_i->nat_list_lock);
}
+ spin_unlock(&nm_i->nat_list_lock);
+
up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
@@ -419,7 +515,7 @@
/*
* This function always returns success
*/
-void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct node_info *ni)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -443,7 +539,7 @@
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
up_read(&nm_i->nat_tree_lock);
- return;
+ return 0;
}
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
@@ -466,6 +562,9 @@
up_read(&nm_i->nat_tree_lock);
page = f2fs_get_meta_page(sbi, index);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
nat_blk = (struct f2fs_nat_block *)page_address(page);
ne = nat_blk->entries[nid - start_nid];
node_info_from_raw_nat(ni, &ne);
@@ -473,6 +572,7 @@
cache:
/* cache nat entry */
cache_nat_entry(sbi, nid, &ne);
+ return 0;
}
/*
@@ -722,12 +822,15 @@
return err;
}
-static void truncate_node(struct dnode_of_data *dn)
+static int truncate_node(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
+ int err;
- f2fs_get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
/* Deallocate node address */
f2fs_invalidate_blocks(sbi, ni.blk_addr);
@@ -750,11 +853,14 @@
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
+
+ return 0;
}
static int truncate_dnode(struct dnode_of_data *dn)
{
struct page *page;
+ int err;
if (dn->nid == 0)
return 1;
@@ -770,7 +876,10 @@
dn->node_page = page;
dn->ofs_in_node = 0;
f2fs_truncate_data_blocks(dn);
- truncate_node(dn);
+ err = truncate_node(dn);
+ if (err)
+ return err;
+
return 1;
}
@@ -835,7 +944,9 @@
if (!ofs) {
/* remove current indirect node */
dn->node_page = page;
- truncate_node(dn);
+ ret = truncate_node(dn);
+ if (ret)
+ goto out_err;
freed++;
} else {
f2fs_put_page(page, 1);
@@ -893,7 +1004,9 @@
if (offset[idx + 1] == 0) {
dn->node_page = pages[idx];
dn->nid = nid[idx];
- truncate_node(dn);
+ err = truncate_node(dn);
+ if (err)
+ goto fail;
} else {
f2fs_put_page(pages[idx], 1);
}
@@ -1014,6 +1127,7 @@
nid_t nid = F2FS_I(inode)->i_xattr_nid;
struct dnode_of_data dn;
struct page *npage;
+ int err;
if (!nid)
return 0;
@@ -1022,10 +1136,15 @@
if (IS_ERR(npage))
return PTR_ERR(npage);
+ set_new_dnode(&dn, inode, NULL, npage, nid);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_page(npage, 1);
+ return err;
+ }
+
f2fs_i_xnid_write(inode, 0);
- set_new_dnode(&dn, inode, NULL, npage, nid);
- truncate_node(&dn);
return 0;
}
@@ -1055,11 +1174,19 @@
f2fs_truncate_data_blocks_range(&dn, 1);
/* 0 is possible, after f2fs_new_inode() has failed */
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+ f2fs_put_dnode(&dn);
+ return -EIO;
+ }
f2fs_bug_on(F2FS_I_SB(inode),
inode->i_blocks != 0 && inode->i_blocks != 8);
/* will put inode & node pages */
- truncate_node(&dn);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
return 0;
}
@@ -1092,7 +1219,11 @@
goto fail;
#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ if (err) {
+ dec_valid_node_count(sbi, dn->inode, !ofs);
+ goto fail;
+ }
f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
#endif
new_ni.nid = dn->nid;
@@ -1140,13 +1271,21 @@
.page = page,
.encrypted_page = NULL,
};
+ int err;
- if (PageUptodate(page))
+ if (PageUptodate(page)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
+#endif
return LOCKED_PAGE;
+ }
- f2fs_get_node_info(sbi, page->index, &ni);
+ err = f2fs_get_node_info(sbi, page->index, &ni);
+ if (err)
+ return err;
- if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ if (unlikely(ni.blk_addr == NULL_ADDR) ||
+ is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
ClearPageUptodate(page);
return -ENOENT;
}
@@ -1348,7 +1487,7 @@
static int __write_node_page(struct page *page, bool atomic, bool *submitted,
struct writeback_control *wbc, bool do_balance,
- enum iostat_type io_type)
+ enum iostat_type io_type, unsigned int *seq_id)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
nid_t nid;
@@ -1365,6 +1504,7 @@
.io_type = io_type,
.io_wbc = wbc,
};
+ unsigned int seq;
trace_f2fs_writepage(page, NODE);
@@ -1374,10 +1514,17 @@
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ IS_DNODE(page) && is_cold_node(page))
+ goto redirty_out;
+
/* get old block addr of this node page */
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
+ if (f2fs_get_node_info(sbi, nid, &ni))
+ goto redirty_out;
+
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
@@ -1385,8 +1532,6 @@
down_read(&sbi->node_write);
}
- f2fs_get_node_info(sbi, nid, &ni);
-
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
@@ -1396,11 +1541,22 @@
return 0;
}
+ if (__is_valid_data_blkaddr(ni.blk_addr) &&
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
+ goto redirty_out;
+
if (atomic && !test_opt(sbi, NOBARRIER))
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
set_page_writeback(page);
ClearPageError(page);
+
+ if (f2fs_in_warm_node_list(sbi, page)) {
+ seq = f2fs_add_fsync_node_entry(sbi, page);
+ if (seq_id)
+ *seq_id = seq;
+ }
+
fio.old_blkaddr = ni.blk_addr;
f2fs_do_write_node_page(nid, &fio);
set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
@@ -1448,7 +1604,7 @@
goto out_page;
if (__write_node_page(node_page, false, NULL,
- &wbc, false, FS_GC_NODE_IO))
+ &wbc, false, FS_GC_NODE_IO, NULL))
unlock_page(node_page);
goto release_page;
} else {
@@ -1465,11 +1621,13 @@
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
+ return __write_node_page(page, false, NULL, wbc, false,
+ FS_NODE_IO, NULL);
}
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic)
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id)
{
pgoff_t index;
pgoff_t last_idx = ULONG_MAX;
@@ -1550,7 +1708,7 @@
ret = __write_node_page(page, atomic &&
page == last_page,
&submitted, wbc, true,
- FS_NODE_IO);
+ FS_NODE_IO, seq_id);
if (ret) {
unlock_page(page);
f2fs_put_page(last_page, 0);
@@ -1667,7 +1825,7 @@
set_dentry_mark(page, 0);
ret = __write_node_page(page, false, &submitted,
- wbc, do_balance, io_type);
+ wbc, do_balance, io_type, NULL);
if (ret)
unlock_page(page);
else if (submitted)
@@ -1686,10 +1844,12 @@
}
if (step < 2) {
+ if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
+ goto out;
step++;
goto next_step;
}
-
+out:
if (nwritten)
f2fs_submit_merged_write(sbi, NODE);
@@ -1698,35 +1858,46 @@
return ret;
}
-int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id)
{
- pgoff_t index = 0;
- struct pagevec pvec;
+ struct fsync_node_entry *fn;
+ struct page *page;
+ struct list_head *head = &sbi->fsync_node_list;
+ unsigned long flags;
+ unsigned int cur_seq_id = 0;
int ret2, ret = 0;
- int nr_pages;
- pagevec_init(&pvec, 0);
-
- while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_WRITEBACK))) {
- int i;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- if (ino && ino_of_node(page) == ino) {
- f2fs_wait_on_page_writeback(page, NODE, true);
- if (TestClearPageError(page))
- ret = -EIO;
- }
+ while (seq_id && cur_seq_id < seq_id) {
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ if (list_empty(head)) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ break;
}
- pagevec_release(&pvec);
- cond_resched();
+ fn = list_first_entry(head, struct fsync_node_entry, list);
+ if (fn->seq_id > seq_id) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ break;
+ }
+ cur_seq_id = fn->seq_id;
+ page = fn->page;
+ get_page(page);
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+
+ f2fs_wait_on_page_writeback(page, NODE, true);
+ if (TestClearPageError(page))
+ ret = -EIO;
+
+ put_page(page);
+
+ if (ret)
+ break;
}
ret2 = filemap_check_errors(NODE_MAPPING(sbi));
if (!ret)
ret = ret2;
+
return ret;
}
@@ -1776,6 +1947,10 @@
if (!PageUptodate(page))
SetPageUptodate(page);
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (IS_INODE(page))
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
@@ -1970,7 +2145,7 @@
kmem_cache_free(free_nid_slab, i);
}
-static void scan_nat_page(struct f2fs_sb_info *sbi,
+static int scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1988,7 +2163,10 @@
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
- f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
+
+ if (blk_addr == NEW_ADDR)
+ return -EINVAL;
+
if (blk_addr == NULL_ADDR) {
add_free_nid(sbi, start_nid, true, true);
} else {
@@ -1997,6 +2175,8 @@
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
+
+ return 0;
}
static void scan_curseg_cache(struct f2fs_sb_info *sbi)
@@ -2052,11 +2232,11 @@
up_read(&nm_i->nat_tree_lock);
}
-static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
+static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- int i = 0;
+ int i = 0, ret;
nid_t nid = nm_i->next_scan_nid;
if (unlikely(nid >= nm_i->max_nid))
@@ -2064,17 +2244,17 @@
/* Enough entries */
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
- return;
+ return 0;
if (!mount) {
/* try to find free nids in free_nid_bitmap */
scan_free_nid_bits(sbi);
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
}
/* readahead nat pages to be scanned */
@@ -2088,8 +2268,16 @@
nm_i->nat_block_bitmap)) {
struct page *page = get_current_nat_page(sbi, nid);
- scan_nat_page(sbi, page, nid);
+ ret = scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
+
+ if (ret) {
+ up_read(&nm_i->nat_tree_lock);
+ f2fs_bug_on(sbi, !mount);
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "NAT is corrupt, run fsck to fix it");
+ return -EINVAL;
+ }
}
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
@@ -2110,13 +2298,19 @@
f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
+
+ return 0;
}
-void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
+ int ret;
+
mutex_lock(&NM_I(sbi)->build_lock);
- __f2fs_build_free_nids(sbi, sync, mount);
+ ret = __f2fs_build_free_nids(sbi, sync, mount);
mutex_unlock(&NM_I(sbi)->build_lock);
+
+ return ret;
}
/*
@@ -2129,12 +2323,11 @@
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
retry:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
f2fs_show_injection_info(FAULT_ALLOC_NID);
return false;
}
-#endif
+
spin_lock(&nm_i->nid_list_lock);
if (unlikely(nm_i->available_nids == 0)) {
@@ -2279,12 +2472,16 @@
struct dnode_of_data dn;
struct node_info ni;
struct page *xpage;
+ int err;
if (!prev_xnid)
goto recover_xnid;
/* 1: invalidate the previous xattr nid */
- f2fs_get_node_info(sbi, prev_xnid, &ni);
+ err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+ if (err)
+ return err;
+
f2fs_invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, inode, false);
set_node_addr(sbi, &ni, NULL_ADDR, false);
@@ -2319,8 +2516,11 @@
nid_t ino = ino_of_node(page);
struct node_info old_ni, new_ni;
struct page *ipage;
+ int err;
- f2fs_get_node_info(sbi, ino, &old_ni);
+ err = f2fs_get_node_info(sbi, ino, &old_ni);
+ if (err)
+ return err;
if (unlikely(old_ni.blk_addr != NULL_ADDR))
return -EINVAL;
@@ -2374,7 +2574,7 @@
return 0;
}
-void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
@@ -2396,6 +2596,9 @@
for (idx = addr; idx < addr + nrpages; idx++) {
struct page *page = f2fs_get_tmp_page(sbi, idx);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
@@ -2407,6 +2610,7 @@
invalidate_mapping_pages(META_MAPPING(sbi), addr,
addr + nrpages);
}
+ return 0;
}
static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
@@ -2584,6 +2788,13 @@
nid_t set_idx = 0;
LIST_HEAD(sets);
+ /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
+ if (enabled_nat_bits(sbi, cpc)) {
+ down_write(&nm_i->nat_tree_lock);
+ remove_nats_in_journal(sbi);
+ up_write(&nm_i->nat_tree_lock);
+ }
+
if (!nm_i->dirty_nat_cnt)
return;
@@ -2636,7 +2847,13 @@
nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
- struct page *page = f2fs_get_meta_page(sbi, nat_bits_addr++);
+ struct page *page;
+
+ page = f2fs_get_meta_page(sbi, nat_bits_addr++);
+ if (IS_ERR(page)) {
+ disable_nat_bits(sbi, true);
+ return PTR_ERR(page);
+ }
memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
page_address(page), F2FS_BLKSIZE);
@@ -2720,6 +2937,7 @@
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
+ spin_lock_init(&nm_i->nat_list_lock);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->nid_list_lock);
@@ -2764,8 +2982,8 @@
for (i = 0; i < nm_i->nat_blocks; i++) {
nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
- NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL);
- if (!nm_i->free_nid_bitmap)
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
+ if (!nm_i->free_nid_bitmap[i])
return -ENOMEM;
}
@@ -2803,8 +3021,7 @@
/* load free nid status from nat_bits table */
load_free_nid_bitmap(sbi);
- f2fs_build_free_nids(sbi, true, true);
- return 0;
+ return f2fs_build_free_nids(sbi, true, true);
}
void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
@@ -2839,8 +3056,13 @@
unsigned idx;
nid = nat_get_nid(natvec[found - 1]) + 1;
- for (idx = 0; idx < found; idx++)
+ for (idx = 0; idx < found; idx++) {
+ spin_lock(&nm_i->nat_list_lock);
+ list_del(&natvec[idx]->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, natvec[idx]);
+ }
}
f2fs_bug_on(sbi, nm_i->nat_cnt);
@@ -2895,8 +3117,15 @@
sizeof(struct nat_entry_set));
if (!nat_entry_set_slab)
goto destroy_free_nid;
+
+ fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry",
+ sizeof(struct fsync_node_entry));
+ if (!fsync_node_entry_slab)
+ goto destroy_nat_entry_set;
return 0;
+destroy_nat_entry_set:
+ kmem_cache_destroy(nat_entry_set_slab);
destroy_free_nid:
kmem_cache_destroy(free_nid_slab);
destroy_nat_entry:
@@ -2907,6 +3136,7 @@
void f2fs_destroy_node_manager_caches(void)
{
+ kmem_cache_destroy(fsync_node_entry_slab);
kmem_cache_destroy(nat_entry_set_slab);
kmem_cache_destroy(free_nid_slab);
kmem_cache_destroy(nat_entry_slab);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index b95e49e..0f4db7a 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -135,6 +135,11 @@
return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
}
+static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
+{
+ return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
+}
+
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
@@ -444,6 +449,10 @@
else
flag &= ~(0x1 << type);
rn->footer.flag = cpu_to_le32(flag);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
}
#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index daf81d4..501bb0f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -241,8 +241,8 @@
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
- unsigned int free_blocks = sbi->user_block_count -
- valid_user_blocks(sbi);
+ unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
+ valid_user_blocks(sbi);
int err = 0;
/* get node pages in the current segment */
@@ -252,10 +252,14 @@
while (1) {
struct fsync_inode_entry *entry;
- if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
if (!is_recoverable_dnode(page))
break;
@@ -471,7 +475,10 @@
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
- f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err)
+ goto err;
+
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
@@ -507,14 +514,13 @@
}
/* dest is valid block, try to recover from src to dest */
- if (f2fs_is_valid_meta_blkaddr(sbi, dest, META_POR)) {
+ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = f2fs_reserve_new_block(&dn);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- while (err)
+ while (err &&
+ IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
err = f2fs_reserve_new_block(&dn);
-#endif
/* We should not get -ENOSPC */
f2fs_bug_on(sbi, err);
if (err)
@@ -568,12 +574,16 @@
while (1) {
struct fsync_inode_entry *entry;
- if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
f2fs_ra_meta_pages_cond(sbi, blkaddr);
page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
@@ -628,7 +638,8 @@
#endif
if (s_flags & MS_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "recover fsync data on readonly fs");
sbi->sb->s_flags &= ~MS_RDONLY;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 52e85a0..1da9a3c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -250,7 +250,13 @@
err = -EAGAIN;
goto next;
}
- f2fs_get_node_info(sbi, dn.nid, &ni);
+
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
+
if (cur->old_addr == NEW_ADDR) {
f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
f2fs_update_data_blkaddr(&dn, NEW_ADDR);
@@ -439,8 +445,10 @@
int err;
f2fs_balance_fs(sbi, true);
- f2fs_lock_op(sbi);
+ down_write(&fi->i_gc_rwsem[WRITE]);
+
+ f2fs_lock_op(sbi);
set_inode_flag(inode, FI_ATOMIC_COMMIT);
mutex_lock(&fi->inmem_lock);
@@ -455,6 +463,8 @@
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
f2fs_unlock_op(sbi);
+ up_write(&fi->i_gc_rwsem[WRITE]);
+
return err;
}
@@ -464,12 +474,10 @@
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
/* balance_fs_bg is able to be pending */
if (need && excess_cached_nats(sbi))
@@ -503,7 +511,8 @@
else
f2fs_build_free_nids(sbi, false, false);
- if (!is_idle(sbi) && !excess_dirty_nats(sbi))
+ if (!is_idle(sbi) &&
+ (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
/* checkpoint is the only way to shrink partial cached entries */
@@ -511,6 +520,7 @@
!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
excess_prefree_segs(sbi) ||
excess_dirty_nats(sbi) ||
+ excess_dirty_nodes(sbi) ||
f2fs_time_over(sbi, CP_TIME)) {
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
@@ -831,9 +841,12 @@
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
+ dc->issuing = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
+ spin_lock_init(&dc->lock);
+ dc->bio_ref = 0;
atomic_inc(&dcc->discard_cmd_cnt);
dcc->undiscard_blks += len;
@@ -860,7 +873,7 @@
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
- atomic_dec(&dcc->issing_discard);
+ atomic_sub(dc->issuing, &dcc->issing_discard);
list_del(&dc->list);
rb_erase(&dc->rb_node, &dcc->root);
@@ -875,9 +888,17 @@
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ unsigned long flags;
trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->bio_ref) {
+ spin_unlock_irqrestore(&dc->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
+
f2fs_bug_on(sbi, dc->ref);
if (dc->error == -EOPNOTSUPP)
@@ -893,10 +914,17 @@
static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
+ unsigned long flags;
dc->error = bio->bi_error;
- dc->state = D_DONE;
- complete_all(&dc->wait);
+
+ spin_lock_irqsave(&dc->lock, flags);
+ dc->bio_ref--;
+ if (!dc->bio_ref && dc->state == D_SUBMIT) {
+ dc->state = D_DONE;
+ complete_all(&dc->wait);
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
bio_put(bio);
}
@@ -934,6 +962,7 @@
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
+ dpolicy->ordered = false;
dpolicy->granularity = granularity;
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
@@ -945,6 +974,7 @@
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = true;
dpolicy->sync = false;
+ dpolicy->ordered = true;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -962,48 +992,115 @@
}
}
-
+static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t lstart,
+ block_t start, block_t len);
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
-static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
+static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
- struct discard_cmd *dc)
+ struct discard_cmd *dc,
+ unsigned int *issued)
{
+ struct block_device *bdev = dc->bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list);
- struct bio *bio = NULL;
int flag = dpolicy->sync ? REQ_SYNC : 0;
+ block_t lstart, start, len, total_len;
+ int err = 0;
if (dc->state != D_PREP)
- return;
+ return 0;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
- return;
+ return 0;
- trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len);
+ trace_f2fs_issue_discard(bdev, dc->start, dc->len);
- dc->error = __blkdev_issue_discard(dc->bdev,
- SECTOR_FROM_BLOCK(dc->start),
- SECTOR_FROM_BLOCK(dc->len),
- GFP_NOFS, 0, &bio);
- if (!dc->error) {
- /* should keep before submission to avoid D_DONE right away */
- dc->state = D_SUBMIT;
- atomic_inc(&dcc->issued_discard);
- atomic_inc(&dcc->issing_discard);
- if (bio) {
- bio->bi_private = dc;
- bio->bi_end_io = f2fs_submit_discard_endio;
- bio->bi_opf |= flag;
- submit_bio(bio);
- list_move_tail(&dc->list, wait_list);
- __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
+ lstart = dc->lstart;
+ start = dc->start;
+ len = dc->len;
+ total_len = len;
- f2fs_update_iostat(sbi, FS_DISCARD, 1);
+ dc->len = 0;
+
+ while (total_len && *issued < dpolicy->max_requests && !err) {
+ struct bio *bio = NULL;
+ unsigned long flags;
+ bool last = true;
+
+ if (len > max_discard_blocks) {
+ len = max_discard_blocks;
+ last = false;
}
- } else {
- __remove_discard_cmd(sbi, dc);
+
+ (*issued)++;
+ if (*issued == dpolicy->max_requests)
+ last = true;
+
+ dc->len += len;
+
+ if (time_to_inject(sbi, FAULT_DISCARD)) {
+ f2fs_show_injection_info(FAULT_DISCARD);
+ err = -EIO;
+ goto submit;
+ }
+ err = __blkdev_issue_discard(bdev,
+ SECTOR_FROM_BLOCK(start),
+ SECTOR_FROM_BLOCK(len),
+ GFP_NOFS, 0, &bio);
+submit:
+ if (err) {
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->state == D_PARTIAL)
+ dc->state = D_SUBMIT;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ break;
+ }
+
+ f2fs_bug_on(sbi, !bio);
+
+ /*
+ * should keep before submission to avoid D_DONE
+ * right away
+ */
+ spin_lock_irqsave(&dc->lock, flags);
+ if (last)
+ dc->state = D_SUBMIT;
+ else
+ dc->state = D_PARTIAL;
+ dc->bio_ref++;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ atomic_inc(&dcc->issing_discard);
+ dc->issuing++;
+ list_move_tail(&dc->list, wait_list);
+
+ /* sanity check on discard range */
+ __check_sit_bitmap(sbi, start, start + len);
+
+ bio->bi_private = dc;
+ bio->bi_end_io = f2fs_submit_discard_endio;
+ bio->bi_opf |= flag;
+ submit_bio(bio);
+
+ atomic_inc(&dcc->issued_discard);
+
+ f2fs_update_iostat(sbi, FS_DISCARD, 1);
+
+ lstart += len;
+ start += len;
+ total_len -= len;
+ len = total_len;
}
+
+ if (!err && len)
+ __update_discard_tree_range(sbi, bdev, lstart, start, len);
+ return err;
}
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
@@ -1084,10 +1181,11 @@
struct discard_cmd *dc;
struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
block_t end = lstart + len;
- mutex_lock(&dcc->cmd_lock);
-
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, lstart,
(struct rb_entry **)&prev_dc,
@@ -1127,7 +1225,8 @@
if (prev_dc && prev_dc->state == D_PREP &&
prev_dc->bdev == bdev &&
- __is_discard_back_mergeable(&di, &prev_dc->di)) {
+ __is_discard_back_mergeable(&di, &prev_dc->di,
+ max_discard_blocks)) {
prev_dc->di.len += di.len;
dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, prev_dc);
@@ -1138,7 +1237,8 @@
if (next_dc && next_dc->state == D_PREP &&
next_dc->bdev == bdev &&
- __is_discard_front_mergeable(&di, &next_dc->di)) {
+ __is_discard_front_mergeable(&di, &next_dc->di,
+ max_discard_blocks)) {
next_dc->di.lstart = di.lstart;
next_dc->di.len += di.len;
next_dc->di.start = di.start;
@@ -1161,8 +1261,6 @@
node = rb_next(&prev_dc->rb_node);
next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
}
-
- mutex_unlock(&dcc->cmd_lock);
}
static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
@@ -1177,10 +1275,72 @@
blkstart -= FDEV(devi).start_blk;
}
+ mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
+ mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
return 0;
}
+static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+ struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct discard_cmd *dc;
+ struct blk_plug plug;
+ unsigned int pos = dcc->next_pos;
+ unsigned int issued = 0;
+ bool io_interrupted = false;
+
+ mutex_lock(&dcc->cmd_lock);
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
+ NULL, pos,
+ (struct rb_entry **)&prev_dc,
+ (struct rb_entry **)&next_dc,
+ &insert_p, &insert_parent, true);
+ if (!dc)
+ dc = next_dc;
+
+ blk_start_plug(&plug);
+
+ while (dc) {
+ struct rb_node *node;
+ int err = 0;
+
+ if (dc->state != D_PREP)
+ goto next;
+
+ if (dpolicy->io_aware && !is_idle(sbi)) {
+ io_interrupted = true;
+ break;
+ }
+
+ dcc->next_pos = dc->lstart + dc->len;
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
+ break;
+next:
+ node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+ dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+ }
+
+ blk_finish_plug(&plug);
+
+ if (!dc)
+ dcc->next_pos = 0;
+
+ mutex_unlock(&dcc->cmd_lock);
+
+ if (!issued && io_interrupted)
+ issued = -1;
+
+ return issued;
+}
+
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
@@ -1188,19 +1348,24 @@
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int i, iter = 0, issued = 0;
+ int i, issued = 0;
bool io_interrupted = false;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (i + 1 < dpolicy->granularity)
break;
+
+ if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+ return __issue_discard_cmd_orderly(sbi, dpolicy);
+
pend_list = &dcc->pend_list[i];
mutex_lock(&dcc->cmd_lock);
if (list_empty(pend_list))
goto next;
- f2fs_bug_on(sbi,
- !f2fs_check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
@@ -1208,20 +1373,19 @@
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
!is_idle(sbi)) {
io_interrupted = true;
- goto skip;
+ break;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
- issued++;
-skip:
- if (++iter >= dpolicy->max_requests)
+ __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
break;
}
blk_finish_plug(&plug);
next:
mutex_unlock(&dcc->cmd_lock);
- if (iter >= dpolicy->max_requests)
+ if (issued >= dpolicy->max_requests || io_interrupted)
break;
}
@@ -1319,21 +1483,22 @@
return trimmed;
}
-static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
struct discard_policy dp;
+ unsigned int discard_blks;
- if (dpolicy) {
- __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
- return;
- }
+ if (dpolicy)
+ return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
/* wait all */
__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+
+ return discard_blks;
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1386,6 +1551,8 @@
/* just to make sure there is no pending discard commands */
__wait_all_discard_cmd(sbi, NULL);
+
+ f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
return dropped;
}
@@ -1649,21 +1816,30 @@
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
+ bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
mutex_lock(&dirty_i->seglist_lock);
while (1) {
int i;
+
+ if (need_align && end != -1)
+ end--;
start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
if (start >= MAIN_SEGS(sbi))
break;
end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
start + 1);
- for (i = start; i < end; i++)
- clear_bit(i, prefree_map);
+ if (need_align) {
+ start = rounddown(start, sbi->segs_per_sec);
+ end = roundup(end, sbi->segs_per_sec);
+ }
- dirty_i->nr_dirty[PRE] -= end - start;
+ for (i = start; i < end; i++) {
+ if (test_and_clear_bit(i, prefree_map))
+ dirty_i->nr_dirty[PRE]--;
+ }
if (!test_opt(sbi, DISCARD))
continue;
@@ -1757,7 +1933,9 @@
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
+ dcc->next_pos = 0;
dcc->root = RB_ROOT;
+ dcc->rbtree_check = false;
init_waitqueue_head(&dcc->discard_wait_queue);
SM_I(sbi)->dcc_info = dcc;
@@ -1907,6 +2085,8 @@
if (addr == NEW_ADDR)
return;
+ invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
+
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
@@ -1925,7 +2105,7 @@
struct seg_entry *se;
bool is_cp = false;
- if (!is_valid_blkaddr(blkaddr))
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -1989,7 +2169,7 @@
*/
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
- return f2fs_get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
}
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
@@ -2372,7 +2552,7 @@
return has_candidate;
}
-static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
unsigned int start, unsigned int end)
{
@@ -2382,12 +2562,15 @@
struct discard_cmd *dc;
struct blk_plug plug;
int issued;
+ unsigned int trimmed = 0;
next:
issued = 0;
mutex_lock(&dcc->cmd_lock);
- f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
@@ -2401,6 +2584,7 @@
while (dc && dc->lstart <= end) {
struct rb_node *node;
+ int err = 0;
if (dc->len < dpolicy->granularity)
goto skip;
@@ -2410,19 +2594,24 @@
goto skip;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
- if (++issued >= dpolicy->max_requests) {
+ if (issued >= dpolicy->max_requests) {
start = dc->lstart + dc->len;
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
- __wait_all_discard_cmd(sbi, NULL);
+ trimmed += __wait_all_discard_cmd(sbi, NULL);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto next;
}
skip:
node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
dc = rb_entry_safe(node, struct discard_cmd, rb_node);
if (fatal_signal_pending(current))
@@ -2431,6 +2620,8 @@
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
+
+ return trimmed;
}
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
@@ -2443,12 +2634,13 @@
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
+ bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
- if (end <= MAIN_BLKADDR(sbi))
- return -EINVAL;
+ if (end < MAIN_BLKADDR(sbi))
+ goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -2460,6 +2652,10 @@
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
+ if (need_align) {
+ start_segno = rounddown(start_segno, sbi->segs_per_sec);
+ end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
+ }
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
@@ -2475,24 +2671,27 @@
if (err)
goto out;
- start_block = START_BLOCK(sbi, start_segno);
- end_block = START_BLOCK(sbi, end_segno + 1);
-
- __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
- __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
-
/*
* We filed discard candidates, but actually we don't need to wait for
* all of them, since they'll be issued in idle time along with runtime
* discard option. User configuration looks like using runtime discard
* or periodic fstrim instead of it.
*/
- if (!test_opt(sbi, DISCARD)) {
- trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+ if (test_opt(sbi, DISCARD))
+ goto out;
+
+ start_block = START_BLOCK(sbi, start_segno);
+ end_block = START_BLOCK(sbi, end_segno + 1);
+
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+ trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
start_block, end_block);
- range->len = F2FS_BLK_TO_BYTES(trimmed);
- }
+
+ trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
+ start_block, end_block);
out:
+ if (!err)
+ range->len = F2FS_BLK_TO_BYTES(trimmed);
return err;
}
@@ -2645,8 +2844,8 @@
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
- is_inode_flag_set(inode, FI_ATOMIC_FILE) ||
- is_inode_flag_set(inode, FI_VOLATILE_FILE))
+ f2fs_is_atomic_file(inode) ||
+ f2fs_is_volatile_file(inode))
return CURSEG_HOT_DATA;
/* f2fs_rw_hint_to_seg_type(inode->i_write_hint); */
return CURSEG_WARM_DATA;
@@ -2788,6 +2987,9 @@
reallocate:
f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio, true);
+ if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(fio->sbi),
+ fio->old_blkaddr, fio->old_blkaddr);
/* writeout dirty page into bdev */
f2fs_submit_page_write(fio);
@@ -2843,11 +3045,9 @@
{
struct f2fs_sb_info *sbi = fio->sbi;
struct f2fs_summary sum;
- struct node_info ni;
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
- f2fs_get_node_info(sbi, dn->nid, &ni);
- set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+ set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
do_write_page(&sum, fio);
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
@@ -2944,8 +3144,11 @@
if (!recover_curseg || recover_newaddr)
update_sit_entry(sbi, new_blkaddr, 1);
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
update_sit_entry(sbi, old_blkaddr, -1);
+ }
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
@@ -2999,7 +3202,7 @@
{
struct page *cpage;
- if (!is_valid_blkaddr(blkaddr))
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
@@ -3009,7 +3212,7 @@
}
}
-static void read_compacted_summaries(struct f2fs_sb_info *sbi)
+static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *seg_i;
@@ -3021,6 +3224,8 @@
start = start_sum_block(sbi);
page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
/* Step 1: restore nat cache */
@@ -3061,11 +3266,14 @@
page = NULL;
page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
offset = 0;
}
}
f2fs_put_page(page, 1);
+ return 0;
}
static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
@@ -3077,6 +3285,7 @@
unsigned short blk_off;
unsigned int segno = 0;
block_t blk_addr = 0;
+ int err = 0;
/* get segment number and block addr */
if (IS_DATASEG(type)) {
@@ -3100,6 +3309,8 @@
}
new = f2fs_get_meta_page(sbi, blk_addr);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
sum = (struct f2fs_summary_block *)page_address(new);
if (IS_NODESEG(type)) {
@@ -3111,7 +3322,9 @@
ns->ofs_in_node = 0;
}
} else {
- f2fs_restore_node_summary(sbi, segno, sum);
+ err = f2fs_restore_node_summary(sbi, segno, sum);
+ if (err)
+ goto out;
}
}
@@ -3131,8 +3344,9 @@
curseg->alloc_type = ckpt->alloc_type[type];
curseg->next_blkoff = blk_off;
mutex_unlock(&curseg->curseg_mutex);
+out:
f2fs_put_page(new, 1);
- return 0;
+ return err;
}
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
@@ -3150,7 +3364,9 @@
META_CP, true);
/* restore for compacted data summary */
- read_compacted_summaries(sbi);
+ err = read_compacted_summaries(sbi);
+ if (err)
+ return err;
type = CURSEG_HOT_NODE;
}
@@ -3281,7 +3497,7 @@
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 38c549d..b3d9e31 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -85,7 +85,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- ((!is_valid_blkaddr(blk_addr)) ? \
+ ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
@@ -215,7 +215,7 @@
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
-#define MAX_SKIP_ATOMIC_COUNT 16
+#define MAX_SKIP_GC_COUNT 16
struct inmem_pages {
struct list_head list;
@@ -648,13 +648,10 @@
{
struct f2fs_sb_info *sbi = fio->sbi;
- if (PAGE_TYPE_OF_BIO(fio->type) == META &&
- (!is_read_io(fio->op) || fio->is_meta))
- BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
- blk_addr >= MAIN_BLKADDR(sbi));
+ if (__is_meta_io(fio))
+ verify_blkaddr(sbi, blk_addr, META_GENERIC);
else
- BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
- blk_addr >= MAX_BLKADDR(sbi));
+ verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
}
/*
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 343a5cf..bba0cd4 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -41,7 +41,7 @@
#ifdef CONFIG_F2FS_FAULT_INJECTION
-char *fault_name[FAULT_MAX] = {
+char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
@@ -55,20 +55,24 @@
[FAULT_TRUNCATE] = "truncate fail",
[FAULT_IO] = "IO error",
[FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
};
-static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
- unsigned int rate)
+void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type)
{
struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
if (rate) {
atomic_set(&ffi->inject_ops, 0);
ffi->inject_rate = rate;
- ffi->inject_type = (1 << FAULT_MAX) - 1;
- } else {
- memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
+
+ if (type)
+ ffi->inject_type = type;
+
+ if (!rate && !type)
+ memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
#endif
@@ -113,6 +117,7 @@
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
+ Opt_fault_type,
Opt_lazytime,
Opt_nolazytime,
Opt_quota,
@@ -170,6 +175,7 @@
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
+ {Opt_fault_type, "fault_type=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
{Opt_quota, "quota"},
@@ -347,12 +353,6 @@
"QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
- return -1;
- }
return 0;
}
#endif
@@ -606,7 +606,18 @@
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, arg);
+ f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
+ set_opt(sbi, FAULT_INJECTION);
+#else
+ f2fs_msg(sb, KERN_INFO,
+ "FAULT_INJECTION was not selected");
+#endif
+ break;
+ case Opt_fault_type:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ f2fs_build_fault_attr(sbi, 0, arg);
set_opt(sbi, FAULT_INJECTION);
#else
f2fs_msg(sb, KERN_INFO,
@@ -775,6 +786,19 @@
#ifdef CONFIG_QUOTA
if (f2fs_check_quota_options(sbi))
return -EINVAL;
+#else
+ if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Filesystem with quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+ if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be "
+ "mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
@@ -1030,6 +1054,10 @@
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
+
+ f2fs_bug_on(sbi, sbi->fsync_node_num);
+
iput(sbi->node_inode);
iput(sbi->meta_inode);
@@ -1310,9 +1338,12 @@
if (F2FS_IO_SIZE_BITS(sbi))
seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (test_opt(sbi, FAULT_INJECTION))
+ if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
F2FS_OPTION(sbi).fault_info.inject_rate);
+ seq_printf(seq, ",fault_type=%u",
+ F2FS_OPTION(sbi).fault_info.inject_type);
+ }
#endif
#ifdef CONFIG_QUOTA
if (test_opt(sbi, QUOTA))
@@ -1357,7 +1388,8 @@
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
F2FS_OPTION(sbi).test_dummy_encryption = false;
- sbi->readdir_ra = 1;
+ F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
+ F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
@@ -1367,12 +1399,12 @@
set_opt(sbi, NOHEAP);
sbi->sb->s_flags |= MS_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
- if (f2fs_sb_has_blkzoned(sbi->sb)) {
- set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
set_opt(sbi, DISCARD);
- } else {
+ if (f2fs_sb_has_blkzoned(sbi->sb))
+ set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ else
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
- }
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -1381,9 +1413,7 @@
set_opt(sbi, POSIX_ACL);
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, 0);
-#endif
+ f2fs_build_fault_attr(sbi, 0, 0);
}
#ifdef CONFIG_QUOTA
@@ -2231,9 +2261,9 @@
return 1;
}
- if (secs_per_zone > total_sections) {
+ if (secs_per_zone > total_sections || !secs_per_zone) {
f2fs_msg(sb, KERN_INFO,
- "Wrong secs_per_zone (%u > %u)",
+ "Wrong secs_per_zone / total_sections (%u, %u)",
secs_per_zone, total_sections);
return 1;
}
@@ -2287,6 +2317,9 @@
unsigned int sit_segs, nat_segs;
unsigned int sit_bitmap_size, nat_bitmap_size;
unsigned int log_blocks_per_seg;
+ unsigned int segment_count_main;
+ unsigned int cp_pack_start_sum, cp_payload;
+ block_t user_block_count;
int i;
total = le32_to_cpu(raw_super->segment_count);
@@ -2311,6 +2344,16 @@
return 1;
}
+ user_block_count = le64_to_cpu(ckpt->user_block_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ if (!user_block_count || user_block_count >=
+ segment_count_main << log_blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong user_block_count: %u", user_block_count);
+ return 1;
+ }
+
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
@@ -2327,7 +2370,6 @@
sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
- log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
@@ -2337,6 +2379,17 @@
return 1;
}
+ cp_pack_start_sum = __start_sum_addr(sbi);
+ cp_payload = __cp_payload(sbi);
+ if (cp_pack_start_sum < cp_payload + 1 ||
+ cp_pack_start_sum > blocks_per_seg - 1 -
+ NR_CURSEG_TYPE) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
@@ -2674,6 +2727,8 @@
sm_i->dcc_info->discard_granularity = 1;
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
}
+
+ sbi->readdir_ra = 1;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -2723,9 +2778,6 @@
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
- F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
- F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
-
/* precompute checksum seed for metadata */
if (f2fs_sb_has_inode_chksum(sb))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
@@ -2889,6 +2941,8 @@
f2fs_init_ino_entry_info(sbi);
+ f2fs_init_fsync_node_info(sbi);
+
/* setup f2fs internal modules */
err = f2fs_build_segment_manager(sbi);
if (err) {
@@ -2935,10 +2989,11 @@
err = PTR_ERR(root);
goto free_stats;
}
- if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
+ !root->i_size || !root->i_nlink) {
iput(root);
err = -EINVAL;
- goto free_node_inode;
+ goto free_stats;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -2952,10 +3007,7 @@
goto free_root_inode;
#ifdef CONFIG_QUOTA
- /*
- * Turn on quotas which were not enabled for read-only mounts if
- * filesystem has quota feature, so that they are updated correctly.
- */
+ /* Enable quota usage during mount */
if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err) {
@@ -3113,9 +3165,19 @@
static void kill_f2fs_super(struct super_block *sb)
{
if (sb->s_root) {
- set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
- f2fs_stop_gc_thread(F2FS_SB(sb));
- f2fs_stop_discard_thread(F2FS_SB(sb));
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ set_sbi_flag(sbi, SBI_IS_CLOSE);
+ f2fs_stop_gc_thread(sbi);
+ f2fs_stop_discard_thread(sbi);
+
+ if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+ !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ struct cp_control cpc = {
+ .reason = CP_UMOUNT,
+ };
+ f2fs_write_checkpoint(sbi, &cpc);
+ }
}
kill_block_super(sb);
}
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index e1b1b31..30fd016 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/compiler.h>
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
@@ -252,6 +253,7 @@
if (t >= 1) {
sbi->gc_mode = GC_URGENT;
if (sbi->gc_thread) {
+ sbi->gc_thread->gc_wake = 1;
wake_up_interruptible_all(
&sbi->gc_thread->gc_wait_queue_head);
wake_up_discard_thread(sbi, true);
@@ -286,8 +288,10 @@
bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") ||
a->struct_type == GC_THREAD);
- if (gc_entry)
- down_read(&sbi->sb->s_umount);
+ if (gc_entry) {
+ if (!down_read_trylock(&sbi->sb->s_umount))
+ return -EAGAIN;
+ }
ret = __sbi_store(a, sbi, buf, count);
if (gc_entry)
up_read(&sbi->sb->s_umount);
@@ -518,7 +522,8 @@
.kset = &f2fs_kset,
};
-static int segment_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -545,7 +550,8 @@
return 0;
}
-static int segment_bits_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -569,7 +575,8 @@
return 0;
}
-static int iostat_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -611,6 +618,28 @@
return 0;
}
+static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
+ void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ int i;
+
+ seq_puts(seq, "format: victim_secmap bitmaps\n");
+
+ for (i = 0; i < MAIN_SECS(sbi); i++) {
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-10d", i);
+ seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0);
+ if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1))
+ seq_putc(seq, '\n');
+ else
+ seq_putc(seq, ' ');
+ }
+ return 0;
+}
+
#define F2FS_PROC_FILE_DEF(_name) \
static int _name##_open_fs(struct inode *inode, struct file *file) \
{ \
@@ -627,6 +656,7 @@
F2FS_PROC_FILE_DEF(segment_info);
F2FS_PROC_FILE_DEF(segment_bits);
F2FS_PROC_FILE_DEF(iostat_info);
+F2FS_PROC_FILE_DEF(victim_bits);
int __init f2fs_init_sysfs(void)
{
@@ -677,6 +707,8 @@
&f2fs_seq_segment_bits_fops, sb);
proc_create_data("iostat_info", S_IRUGO, sbi->s_proc,
&f2fs_seq_iostat_info_fops, sb);
+ proc_create_data("victim_bits", S_IRUGO, sbi->s_proc,
+ &f2fs_seq_victim_bits_fops, sb);
}
return 0;
}
@@ -687,6 +719,7 @@
remove_proc_entry("iostat_info", sbi->s_proc);
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry("segment_bits", sbi->s_proc);
+ remove_proc_entry("victim_bits", sbi->s_proc);
remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
}
kobject_del(&sbi->s_kobj);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 7082718..77a010e 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -37,9 +37,6 @@
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -62,9 +59,6 @@
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -100,12 +94,22 @@
const char *name, const void *value,
size_t size, int flags)
{
+ unsigned char old_advise = F2FS_I(inode)->i_advise;
+ unsigned char new_advise;
+
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
return -EINVAL;
- F2FS_I(inode)->i_advise |= *(char *)value;
+ new_advise = *(char *)value;
+ if (new_advise & ~FADVISE_MODIFIABLE_BITS)
+ return -EINVAL;
+
+ new_advise = new_advise & FADVISE_MODIFIABLE_BITS;
+ new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS;
+
+ F2FS_I(inode)->i_advise = new_advise;
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 8f040f8..25c8b32 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -341,6 +341,7 @@
* for this bh as it's not marked locally
* uptodate. */
status = -EIO;
+ clear_buffer_needs_validate(bh);
put_bh(bh);
bhs[i] = NULL;
continue;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index aa5db8b..f70f8ac 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -304,11 +304,6 @@
* For NAT entries
*/
#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
-#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8)
-#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \
- ((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \
- BITS_PER_LONG * BITS_PER_LONG)
-
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 316694d..008f466 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -87,7 +87,7 @@
* According to specification 102 622 chapter 4.4 Pipes,
* the pipe identifier is 7 bits long.
*/
-#define NFC_HCI_MAX_PIPES 127
+#define NFC_HCI_MAX_PIPES 128
struct nfc_hci_init_data {
u8 gate_count;
struct nfc_hci_gate gates[NFC_HCI_MAX_CUSTOM_GATES];
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0bb5046..3d01957 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11166,7 +11166,8 @@
* - A task which has been woken up by try_to_wake_up() and
* waiting for actually being woken up by sched_ttwu_pending().
*/
- if (!se->sum_exec_runtime || p->state == TASK_WAKING)
+ if (!se->sum_exec_runtime ||
+ (p->state == TASK_WAKING && p->sched_remote_wakeup))
return true;
return false;
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index a8fab0c..92fcb92 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -791,7 +791,7 @@
u64 prefer_idle)
{
struct schedtune *st = css_st(css);
- st->prefer_idle = prefer_idle;
+ st->prefer_idle = !!prefer_idle;
return 0;
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dc29b60..f316e90 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1504,6 +1504,8 @@
tmp_iter_page = first_page;
do {
+ cond_resched();
+
to_remove_page = tmp_iter_page;
rb_inc_page(cpu_buffer, &tmp_iter_page);
diff --git a/mm/shmem.c b/mm/shmem.c
index 61a39aa..290c5b8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2160,6 +2160,8 @@
mpol_shared_policy_init(&info->policy, NULL);
break;
}
+
+ lockdep_annotate_inode_mutex_key(inode);
} else
shmem_free_inode(sb);
return inode;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 340a3db..2cfbe3f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1147,6 +1147,12 @@
lladdr = neigh->ha;
}
+ /* Update confirmed timestamp for neighbour entry after we
+ * received ARP packet even if it doesn't change IP to MAC binding.
+ */
+ if (new & NUD_CONNECTED)
+ neigh->confirmed = jiffies;
+
/* If entry was valid and address is not changed,
do not change entry state, if new one is STALE.
*/
@@ -1168,15 +1174,12 @@
}
}
- /* Update timestamps only once we know we will make a change to the
+ /* Update timestamp only once we know we will make a change to the
* neighbour entry. Otherwise we risk to move the locktime window with
* noop updates and ignore relevant ARP updates.
*/
- if (new != old || lladdr != neigh->ha) {
- if (new & NUD_CONNECTED)
- neigh->confirmed = jiffies;
+ if (new != old || lladdr != neigh->ha)
neigh->updated = jiffies;
- }
if (new != old) {
neigh_del_timer(neigh);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 90c91a7..275ef13 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1315,6 +1315,7 @@
if (encap)
skb_reset_inner_headers(skb);
skb->network_header = (u8 *)iph - skb->head;
+ skb_reset_mac_len(skb);
} while ((skb = skb->next));
out:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 885cc39..789bbcb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1740,6 +1740,28 @@
inet_compute_pseudo);
}
+/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
+ * return code conversion for ip layer consumption
+ */
+static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
+ struct udphdr *uh)
+{
+ int ret;
+
+ if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
+ skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ inet_compute_pseudo);
+
+ ret = udp_queue_rcv_skb(sk, skb);
+
+ /* a return value > 0 means to resubmit the input, but
+ * it wants the return to be -protocol, or 0
+ */
+ if (ret > 0)
+ return -ret;
+ return 0;
+}
+
/*
* All we need to do is get the socket, and then do a checksum.
*/
@@ -1786,14 +1808,9 @@
if (unlikely(sk->sk_rx_dst != dst))
udp_sk_rx_dst_set(sk, dst);
- ret = udp_queue_rcv_skb(sk, skb);
+ ret = udp_unicast_rcv_skb(sk, skb, uh);
sock_put(sk);
- /* a return value > 0 means to resubmit the input, but
- * it wants the return to be -protocol, or 0
- */
- if (ret > 0)
- return -ret;
- return 0;
+ return ret;
}
if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
@@ -1801,22 +1818,8 @@
saddr, daddr, udptable, proto);
sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
- if (sk) {
- int ret;
-
- if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
- skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
- inet_compute_pseudo);
-
- ret = udp_queue_rcv_skb(sk, skb);
-
- /* a return value > 0 means to resubmit the input, but
- * it wants the return to be -protocol, or 0
- */
- if (ret > 0)
- return -ret;
- return 0;
- }
+ if (sk)
+ return udp_unicast_rcv_skb(sk, skb, uh);
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 3cdf4dc..7c539de 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -113,6 +113,7 @@
payload_len = skb->len - nhoff - sizeof(*ipv6h);
ipv6h->payload_len = htons(payload_len);
skb->network_header = (u8 *)ipv6h - skb->head;
+ skb_reset_mac_len(skb);
if (udpfrag) {
int err = ip6_find_1stfragopt(skb, &prevhdr);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index def627f..46f8e7c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -210,12 +210,10 @@
kfree_skb(skb);
return -ENOBUFS;
}
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
consume_skb(skb);
skb = skb2;
- /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
- * it is safe to call in our context (socket lock not held)
- */
- skb_set_owner_w(skb, (struct sock *)sk);
}
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c
index 3469ac14..d0dfa82 100644
--- a/sound/firewire/bebob/bebob.c
+++ b/sound/firewire/bebob/bebob.c
@@ -263,6 +263,8 @@
error:
mutex_unlock(&devices_mutex);
snd_bebob_stream_destroy_duplex(bebob);
+ kfree(bebob->maudio_special_quirk);
+ bebob->maudio_special_quirk = NULL;
snd_card_free(bebob->card);
dev_info(&bebob->unit->device,
"Sound card registration failed: %d\n", err);
diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c
index 07e5abd..6dbf047 100644
--- a/sound/firewire/bebob/bebob_maudio.c
+++ b/sound/firewire/bebob/bebob_maudio.c
@@ -96,17 +96,13 @@
struct fw_device *device = fw_parent_device(unit);
int err, rcode;
u64 date;
- __le32 cues[3] = {
- cpu_to_le32(MAUDIO_BOOTLOADER_CUE1),
- cpu_to_le32(MAUDIO_BOOTLOADER_CUE2),
- cpu_to_le32(MAUDIO_BOOTLOADER_CUE3)
- };
+ __le32 *cues;
/* check date of software used to build */
err = snd_bebob_read_block(unit, INFO_OFFSET_SW_DATE,
&date, sizeof(u64));
if (err < 0)
- goto end;
+ return err;
/*
* firmware version 5058 or later has date later than "20070401", but
* 'date' is not null-terminated.
@@ -114,20 +110,28 @@
if (date < 0x3230303730343031LL) {
dev_err(&unit->device,
"Use firmware version 5058 or later\n");
- err = -ENOSYS;
- goto end;
+ return -ENXIO;
}
+ cues = kmalloc_array(3, sizeof(*cues), GFP_KERNEL);
+ if (!cues)
+ return -ENOMEM;
+
+ cues[0] = cpu_to_le32(MAUDIO_BOOTLOADER_CUE1);
+ cues[1] = cpu_to_le32(MAUDIO_BOOTLOADER_CUE2);
+ cues[2] = cpu_to_le32(MAUDIO_BOOTLOADER_CUE3);
+
rcode = fw_run_transaction(device->card, TCODE_WRITE_BLOCK_REQUEST,
device->node_id, device->generation,
device->max_speed, BEBOB_ADDR_REG_REQ,
- cues, sizeof(cues));
+ cues, 3 * sizeof(*cues));
+ kfree(cues);
if (rcode != RCODE_COMPLETE) {
dev_err(&unit->device,
"Failed to send a cue to load firmware\n");
err = -EIO;
}
-end:
+
return err;
}
@@ -290,10 +294,6 @@
bebob->midi_output_ports = 2;
}
end:
- if (err < 0) {
- kfree(params);
- bebob->maudio_special_quirk = NULL;
- }
mutex_unlock(&bebob->mutex);
return err;
}
diff --git a/sound/firewire/digi00x/digi00x.c b/sound/firewire/digi00x/digi00x.c
index 1f5e1d2..ef68999 100644
--- a/sound/firewire/digi00x/digi00x.c
+++ b/sound/firewire/digi00x/digi00x.c
@@ -49,6 +49,7 @@
fw_unit_put(dg00x->unit);
mutex_destroy(&dg00x->mutex);
+ kfree(dg00x);
}
static void dg00x_card_free(struct snd_card *card)
diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c
index 71a0613..f2d0733 100644
--- a/sound/firewire/fireworks/fireworks.c
+++ b/sound/firewire/fireworks/fireworks.c
@@ -301,6 +301,8 @@
snd_efw_transaction_remove_instance(efw);
snd_efw_stream_destroy_duplex(efw);
snd_card_free(efw->card);
+ kfree(efw->resp_buf);
+ efw->resp_buf = NULL;
dev_info(&efw->unit->device,
"Sound card registration failed: %d\n", err);
}
diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
index 474b06d..696b6cf 100644
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c
@@ -135,6 +135,7 @@
kfree(oxfw->spec);
mutex_destroy(&oxfw->mutex);
+ kfree(oxfw);
}
/*
@@ -212,6 +213,7 @@
static void do_registration(struct work_struct *work)
{
struct snd_oxfw *oxfw = container_of(work, struct snd_oxfw, dwork.work);
+ int i;
int err;
if (oxfw->registered)
@@ -274,7 +276,15 @@
snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->rx_stream);
if (oxfw->has_output)
snd_oxfw_stream_destroy_simplex(oxfw, &oxfw->tx_stream);
+ for (i = 0; i < SND_OXFW_STREAM_FORMAT_ENTRIES; ++i) {
+ kfree(oxfw->tx_stream_formats[i]);
+ oxfw->tx_stream_formats[i] = NULL;
+ kfree(oxfw->rx_stream_formats[i]);
+ oxfw->rx_stream_formats[i] = NULL;
+ }
snd_card_free(oxfw->card);
+ kfree(oxfw->spec);
+ oxfw->spec = NULL;
dev_info(&oxfw->unit->device,
"Sound card registration failed: %d\n", err);
}
diff --git a/sound/firewire/tascam/tascam.c b/sound/firewire/tascam/tascam.c
index 9dc93a7..4c967ac 100644
--- a/sound/firewire/tascam/tascam.c
+++ b/sound/firewire/tascam/tascam.c
@@ -93,6 +93,7 @@
fw_unit_put(tscm->unit);
mutex_destroy(&tscm->mutex);
+ kfree(tscm);
}
static void tscm_card_free(struct snd_card *card)
diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
index 56fc47b..50b216f 100644
--- a/sound/pci/emu10k1/emufx.c
+++ b/sound/pci/emu10k1/emufx.c
@@ -2520,7 +2520,7 @@
emu->support_tlv = 1;
return put_user(SNDRV_EMU10K1_VERSION, (int __user *)argp);
case SNDRV_EMU10K1_IOCTL_INFO:
- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
snd_emu10k1_fx8010_info(emu, info);
diff --git a/sound/soc/codecs/cs4265.c b/sound/soc/codecs/cs4265.c
index fd966bb..6e8eb1f 100644
--- a/sound/soc/codecs/cs4265.c
+++ b/sound/soc/codecs/cs4265.c
@@ -157,8 +157,8 @@
SOC_SINGLE("Validity Bit Control Switch", CS4265_SPDIF_CTL2,
3, 1, 0),
SOC_ENUM("SPDIF Mono/Stereo", spdif_mono_stereo_enum),
- SOC_SINGLE("MMTLR Data Switch", 0,
- 1, 1, 0),
+ SOC_SINGLE("MMTLR Data Switch", CS4265_SPDIF_CTL2,
+ 0, 1, 0),
SOC_ENUM("Mono Channel Select", spdif_mono_select_enum),
SND_SOC_BYTES("C Data Buffer", CS4265_C_DATA_BUFF, 24),
};