Merge android-4.9.177 (e6d81da) into msm-4.9
* refs/heads/tmp-e6d81da:
Linux 4.9.177
powerpc/booke64: set RI in default MSR
powerpc/lib: fix book3s/32 boot failure due to code patching
drivers/virt/fsl_hypervisor.c: prevent integer overflow in ioctl
drivers/virt/fsl_hypervisor.c: dereferencing error pointers in ioctl
bonding: fix arp_validate toggling in active-backup mode
ipv4: Fix raw socket lookup for local traffic
vrf: sit mtu should not be updated when vrf netdev is the link
vlan: disable SIOCSHWTSTAMP in container
packet: Fix error path in packet_init
net: ucc_geth - fix Oops when changing number of buffers in the ring
fib_rules: return 0 directly if an exactly same rule exists when NLM_F_EXCL not supplied
bridge: Fix error path for kobject_init_and_add()
powerpc/64s: Include cpu header
x86/vdso: Pass --eh-frame-hdr to the linker
x86/vdso: Drop implicit common-page-size linker flag
x86: vdso: Use $LD instead of $CC to link
Revert "x86: vdso: Use $LD instead of $CC to link"
Revert "x86/vdso: Drop implicit common-page-size linker flag"
Don't jump to compute_result state from check_result state
rtlwifi: rtl8723ae: Fix missing break in switch statement
ALSA: pcm: remove SNDRV_PCM_IOCTL1_INFO internal command
cw1200: fix missing unlock on error in cw1200_hw_scan()
Input: synaptics-rmi4 - fix possible double free
spi: ST ST95HF NFC: declare missing of table
spi: Micrel eth switch: declare missing of table
gpu: ipu-v3: dp: fix CSC handling
selftests/net: correct the return value for run_netsocktests
drm/sun4i: Set device driver data at bind time for use in unbind
s390: ctcm: fix ctcm_new_device error return code
MIPS: perf: ath79: Fix perfcount IRQ assignment
ipvs: do not schedule icmp errors from tunnels
selftests: netfilter: check icmp pkttoobig errors are set as related
init: initialize jump labels before command line option parsing
tools lib traceevent: Fix missing equality check for strcmp
KVM: x86: avoid misreporting level-triggered irqs as edge-triggered in tracing
x86/reboot, efi: Use EFI reboot for Acer TravelMate X514-51T
mISDN: Check address length before reading address family
s390/3270: fix lockdep false positive on view->lock
mac80211: fix unaligned access in mesh table hash function
s390/dasd: Fix capacity calculation for large volumes
libnvdimm/btt: Fix a kmemdup failure check
HID: input: add mapping for "Toggle Display" key
HID: input: add mapping for keyboard Brightness Up/Down/Toggle keys
HID: input: add mapping for Expose/Overview key
libnvdimm/namespace: Fix a potential NULL pointer dereference
iio: adc: xilinx: fix potential use-after-free on remove
USB: serial: fix unthrottle races
platform/x86: sony-laptop: Fix unintentional fall-through
bpf: convert htab map to hlist_nulls
bpf: fix struct htab_elem layout
netfilter: compat: initialize all fields in xt_init
ANDROID: cuttlefish_defconfig: Disable DEVTMPFS
ANDROID: Move from clang r349610 to r353983c.
f2fs: fix to avoid accessing xattr across the boundary
f2fs: fix to avoid potential race on sbi->unusable_block_count access/update
f2fs: add tracepoint for f2fs_filemap_fault()
f2fs: introduce DATA_GENERIC_ENHANCE
f2fs: fix to handle error in f2fs_disable_checkpoint()
f2fs: remove redundant check in f2fs_file_write_iter()
f2fs: fix to be aware of readonly device in write_checkpoint()
f2fs: fix to skip recovery on readonly device
f2fs: fix to consider multiple device for readonly check
f2fs: relocate chksum_offset for large_nat_bitmap feature
f2fs: allow unfixed f2fs_checkpoint.checksum_offset
f2fs: Replace spaces with tab
f2fs: insert space before the open parenthesis '('
f2fs: allow address pointer number of dnode aligning to specified size
f2fs: introduce f2fs_read_single_page() for cleanup
f2fs: mark is_extension_exist() inline
f2fs: fix to set FI_UPDATE_WRITE correctly
f2fs: fix to avoid panic in f2fs_inplace_write_data()
f2fs: fix to do sanity check on valid block count of segment
f2fs: fix to do sanity check on valid node/block count
f2fs: fix to avoid panic in do_recover_data()
f2fs: fix to do sanity check on free nid
f2fs: fix to do checksum even if inode page is uptodate
f2fs: fix to avoid panic in f2fs_remove_inode_page()
f2fs: fix to clear dirty inode in error path of f2fs_iget()
f2fs: remove new blank line of f2fs kernel message
f2fs: fix wrong __is_meta_io() macro
f2fs: fix to avoid panic in dec_valid_node_count()
f2fs: fix to avoid panic in dec_valid_block_count()
f2fs: fix to use inline space only if inline_xattr is enable
f2fs: fix to retrieve inline xattr space
f2fs: fix error path of recovery
f2fs: fix to avoid deadloop in foreground GC
f2fs: data: fix warning Using plain integer as NULL pointer
f2fs: add tracepoint for f2fs_file_write_iter()
f2fs: add comment for conditional compilation statement
f2fs: fix potential recursive call when enabling data_flush
f2fs: improve discard handling with multi-device volumes
f2fs: Reduce zoned block device memory usage
f2fs: Fix use of number of devices
Conflicts:
fs/f2fs/data.c
sound/core/pcm_native.c
Change-Id: Iafbaff7b4142f60cb77963ba398f7d6dfc712c82
Signed-off-by: jianzhou <jianzhou@codeaurora.org>
diff --git a/Makefile b/Makefile
index 373bfcf..8be3130 100755
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 9
-SUBLEVEL = 176
+SUBLEVEL = 177
EXTRAVERSION =
NAME = Roaring Lionus
diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig
index b87deb9..3c1e253 100644
--- a/arch/arm64/configs/cuttlefish_defconfig
+++ b/arch/arm64/configs/cuttlefish_defconfig
@@ -196,7 +196,6 @@
# CONFIG_MAC80211_RC_MINSTREL is not set
CONFIG_RFKILL=y
# CONFIG_UEVENT_HELPER is not set
-CONFIG_DEVTMPFS=y
# CONFIG_ALLOW_DEV_COREDUMP is not set
CONFIG_DEBUG_DEVRES=y
CONFIG_OF_UNITTEST=y
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 26a058d..c7c31e2 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -183,12 +183,6 @@
return ath79_sys_type;
}
-int get_c0_perfcount_int(void)
-{
- return ATH79_MISC_IRQ(5);
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
unsigned int get_c0_compare_int(void)
{
return CP0_LEGACY_COMPARE_IRQ;
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 737e012..319ed53 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -41,7 +41,7 @@
#if defined(CONFIG_PPC_BOOK3E_64)
#define MSR_64BIT MSR_CM
-#define MSR_ (MSR_ME | MSR_CE)
+#define MSR_ (MSR_ME | MSR_RI | MSR_CE)
#define MSR_KERNEL (MSR_ | MSR_64BIT)
#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
#define MSR_USER64 (MSR_USER32 | MSR_64BIT)
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 30542e8..f4a98d9 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -4,6 +4,7 @@
//
// Copyright 2018, Michael Ellerman, IBM Corporation.
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/debugfs.h>
#include <linux/device.h>
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 14535ad..c312955 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -23,7 +23,7 @@
int err;
/* Make sure we aren't patching a freed init section */
- if (init_mem_is_free && init_section_contains(addr, 4)) {
+ if (*PTRRELOC(&init_mem_is_free) && init_section_contains(addr, 4)) {
pr_debug("Skipping init section patching addr: 0x%px\n", addr);
return 0;
}
diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig
index 4d7796d..df87d30 100644
--- a/arch/x86/configs/x86_64_cuttlefish_defconfig
+++ b/arch/x86/configs/x86_64_cuttlefish_defconfig
@@ -215,7 +215,6 @@
CONFIG_MAC80211=y
CONFIG_RFKILL=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
CONFIG_DEBUG_DEVRES=y
CONFIG_OF=y
CONFIG_OF_UNITTEST=y
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 756dc94..0d3ebdf 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -167,7 +167,8 @@
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
- $(call ld-option, --build-id) -Bsymbolic
+ $(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \
+ -Bsymbolic
GCOV_PROFILE := n
#
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4a12362..c55b11f 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -82,6 +82,19 @@
return 0;
}
+/*
+ * Some machines don't handle the default ACPI reboot method and
+ * require the EFI reboot method:
+ */
+static int __init set_efi_reboot(const struct dmi_system_id *d)
+{
+ if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) {
+ reboot_type = BOOT_EFI;
+ pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident);
+ }
+ return 0;
+}
+
void __noreturn machine_real_restart(unsigned int type)
{
local_irq_disable();
@@ -167,6 +180,14 @@
DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
},
},
+ { /* Handle reboot issue on Acer TravelMate X514-51T */
+ .callback = set_efi_reboot,
+ .ident = "Acer TravelMate X514-51T",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"),
+ },
+ },
/* Apple */
{ /* Handle problems with rebooting on Apple MacBook5 */
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0a6cc67..ea618b7 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -434,13 +434,13 @@
);
TRACE_EVENT(kvm_apic_accept_irq,
- TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
+ TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
- __field( __u8, tm )
+ __field( __u16, tm )
__field( __u8, vec )
),
diff --git a/build.config.cuttlefish.aarch64 b/build.config.cuttlefish.aarch64
index 7bf19df..428186ff 100644
--- a/build.config.cuttlefish.aarch64
+++ b/build.config.cuttlefish.aarch64
@@ -6,7 +6,7 @@
EXTRA_CMDS=''
KERNEL_DIR=common
POST_DEFCONFIG_CMDS="check_defconfig"
-CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r349610/bin
+CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin
LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin
FILES="
arch/arm64/boot/Image.gz
diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64
index a25afa0..7a79036 100644
--- a/build.config.cuttlefish.x86_64
+++ b/build.config.cuttlefish.x86_64
@@ -6,7 +6,7 @@
EXTRA_CMDS=''
KERNEL_DIR=common
POST_DEFCONFIG_CMDS="check_defconfig"
-CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r349610/bin
+CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin
LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin
FILES="
arch/x86/boot/bzImage
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 97828fa..d58991b 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -137,6 +137,8 @@
ret = -ENOMEM;
goto free_drm;
}
+
+ dev_set_drvdata(dev, drm);
drm->dev_private = drv;
drm_vblank_init(drm, 1);
diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c
index 98686ed..33de3a1 100644
--- a/drivers/gpu/ipu-v3/ipu-dp.c
+++ b/drivers/gpu/ipu-v3/ipu-dp.c
@@ -195,7 +195,8 @@
ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs,
DP_COM_CONF_CSC_DEF_BOTH);
} else {
- if (flow->foreground.in_cs == flow->out_cs)
+ if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN ||
+ flow->foreground.in_cs == flow->out_cs)
/*
* foreground identical to output, apply color
* conversion on background
@@ -261,6 +262,8 @@
struct ipu_dp_priv *priv = flow->priv;
u32 reg, csc;
+ dp->in_cs = IPUV3_COLORSPACE_UNKNOWN;
+
if (!dp->foreground)
return;
@@ -268,8 +271,9 @@
reg = readl(flow->base + DP_COM_CONF);
csc = reg & DP_COM_CONF_CSC_DEF_MASK;
- if (csc == DP_COM_CONF_CSC_DEF_FG)
- reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+ reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+ if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG)
+ reg |= DP_COM_CONF_CSC_DEF_BG;
reg &= ~DP_COM_CONF_FG_EN;
writel(reg, flow->base + DP_COM_CONF);
@@ -350,6 +354,8 @@
mutex_init(&priv->mutex);
for (i = 0; i < IPUV3_NUM_FLOWS; i++) {
+ priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN;
+ priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN;
priv->flow[i].foreground.foreground = true;
priv->flow[i].base = priv->base + ipu_dp_flow_base[i];
priv->flow[i].priv = priv;
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index fc7ada2..9f7b1cf 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -607,6 +607,14 @@
break;
}
+ if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */
+ switch (usage->hid & 0xf) {
+ case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break;
+ default: goto ignore;
+ }
+ break;
+ }
+
/*
* Some lazy vendors declare 255 usages for System Control,
* leading to the creation of ABS_X|Y axis and too many others.
@@ -802,6 +810,10 @@
case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break;
case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break;
+ case 0x079: map_key_clear(KEY_KBDILLUMUP); break;
+ case 0x07a: map_key_clear(KEY_KBDILLUMDOWN); break;
+ case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE); break;
+
case 0x082: map_key_clear(KEY_VIDEO_NEXT); break;
case 0x083: map_key_clear(KEY_LAST); break;
case 0x084: map_key_clear(KEY_ENTER); break;
@@ -932,6 +944,8 @@
case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT); break;
case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL); break;
+ case 0x29f: map_key_clear(KEY_SCALE); break;
+
default: map_key_clear(KEY_UNKNOWN);
}
break;
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index 56cf590..143894a 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -1299,7 +1299,7 @@
}
free_irq(irq, indio_dev);
clk_disable_unprepare(xadc->clk);
- cancel_delayed_work(&xadc->zynq_unmask_work);
+ cancel_delayed_work_sync(&xadc->zynq_unmask_work);
kfree(xadc->data);
kfree(indio_dev->channels);
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 4a88312..65038dc 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -772,7 +772,7 @@
error = rmi_register_function(fn);
if (error)
- goto err_put_fn;
+ return error;
if (pdt->function_number == 0x01)
data->f01_container = fn;
@@ -780,10 +780,6 @@
list_add_tail(&fn->node, &data->function_list);
return RMI_SCAN_CONTINUE;
-
-err_put_fn:
- put_device(&fn->dev);
- return error;
}
int rmi_driver_suspend(struct rmi_device *rmi_dev)
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c
index aa72907..0390603 100644
--- a/drivers/irqchip/irq-ath79-misc.c
+++ b/drivers/irqchip/irq-ath79-misc.c
@@ -22,6 +22,15 @@
#define AR71XX_RESET_REG_MISC_INT_ENABLE 4
#define ATH79_MISC_IRQ_COUNT 32
+#define ATH79_MISC_PERF_IRQ 5
+
+static int ath79_perfcount_irq;
+
+int get_c0_perfcount_int(void)
+{
+ return ath79_perfcount_irq;
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
static void ath79_misc_irq_handler(struct irq_desc *desc)
{
@@ -113,6 +122,8 @@
{
void __iomem *base = domain->host_data;
+ ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ);
+
/* Disable and clear all interrupts */
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 99e5f97..f96b8f2 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -712,10 +712,10 @@
struct sock *sk = sock->sk;
int err = 0;
- if (!maddr || maddr->family != AF_ISDN)
+ if (addr_len < sizeof(struct sockaddr_mISDN))
return -EINVAL;
- if (addr_len < sizeof(struct sockaddr_mISDN))
+ if (!maddr || maddr->family != AF_ISDN)
return -EINVAL;
lock_sock(sk);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c3815fa..0698063 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3914,26 +3914,15 @@
case check_state_check_result:
sh->check_state = check_state_idle;
+ if (s->failed > 1)
+ break;
/* handle a successful check operation, if parity is correct
* we are done. Otherwise update the mismatch count and repair
* parity if !MD_RECOVERY_CHECK
*/
if (sh->ops.zero_sum_result == 0) {
- /* both parities are correct */
- if (!s->failed)
- set_bit(STRIPE_INSYNC, &sh->state);
- else {
- /* in contrast to the raid5 case we can validate
- * parity, but still have a failure to write
- * back
- */
- sh->check_state = check_state_compute_result;
- /* Returning at this point means that we may go
- * off and bring p and/or q uptodate again so
- * we make sure to check zero_sum_result again
- * to verify if p or q need writeback
- */
- }
+ /* Any parity checked was correct */
+ set_bit(STRIPE_INSYNC, &sh->state);
} else {
atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 473da3b..258cb39 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1065,13 +1065,6 @@
{
netdev_info(bond->dev, "Setting arp_validate to %s (%llu)\n",
newval->string, newval->value);
-
- if (bond->dev->flags & IFF_UP) {
- if (!newval->value)
- bond->recv_probe = NULL;
- else if (bond->params.arp_interval)
- bond->recv_probe = bond_arp_rcv;
- }
bond->params.arp_validate = newval->value;
return 0;
diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index 812a968..6aa4b50 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
@@ -250,14 +250,12 @@
return -EINVAL;
}
+ if (netif_running(netdev))
+ return -EBUSY;
+
ug_info->bdRingLenRx[queue] = ring->rx_pending;
ug_info->bdRingLenTx[queue] = ring->tx_pending;
- if (netif_running(netdev)) {
- /* FIXME: restart automatically */
- netdev_info(netdev, "Please re-open the interface\n");
- }
-
return ret;
}
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 1e2d4f1..45df036 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -162,6 +162,14 @@
};
MODULE_DEVICE_TABLE(spi, ks8995_id);
+static const struct of_device_id ks8895_spi_of_match[] = {
+ { .compatible = "micrel,ks8995" },
+ { .compatible = "micrel,ksz8864" },
+ { .compatible = "micrel,ksz8795" },
+ { },
+ };
+MODULE_DEVICE_TABLE(of, ks8895_spi_of_match);
+
static inline u8 get_chip_id(u8 val)
{
return (val >> ID1_CHIPID_S) & ID1_CHIPID_M;
@@ -529,6 +537,7 @@
static struct spi_driver ks8995_driver = {
.driver = {
.name = "spi-ks8995",
+ .of_match_table = of_match_ptr(ks8895_spi_of_match),
},
.probe = ks8995_probe,
.remove = ks8995_remove,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
index f8be0bd..0741280 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
@@ -1703,6 +1703,7 @@
rtlhal->oem_id = RT_CID_819X_LENOVO;
break;
}
+ break;
case 0x1025:
rtlhal->oem_id = RT_CID_819X_ACER;
break;
diff --git a/drivers/net/wireless/st/cw1200/scan.c b/drivers/net/wireless/st/cw1200/scan.c
index c5492d7..b35f470 100644
--- a/drivers/net/wireless/st/cw1200/scan.c
+++ b/drivers/net/wireless/st/cw1200/scan.c
@@ -84,8 +84,11 @@
frame.skb = ieee80211_probereq_get(hw, priv->vif->addr, NULL, 0,
req->ie_len);
- if (!frame.skb)
+ if (!frame.skb) {
+ mutex_unlock(&priv->conf_mutex);
+ up(&priv->scan.lock);
return -ENOMEM;
+ }
if (req->ie_len)
memcpy(skb_put(frame.skb, req->ie_len), req->ie, req->ie_len);
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index c2840e4..850e755 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -1074,6 +1074,12 @@
};
MODULE_DEVICE_TABLE(spi, st95hf_id);
+static const struct of_device_id st95hf_spi_of_match[] = {
+ { .compatible = "st,st95hf" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, st95hf_spi_of_match);
+
static int st95hf_probe(struct spi_device *nfc_spi_dev)
{
int ret;
@@ -1260,6 +1266,7 @@
.driver = {
.name = "st95hf",
.owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(st95hf_spi_of_match),
},
.id_table = st95hf_id,
.probe = st95hf_probe,
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 97dd292..5d2c766 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -190,14 +190,15 @@
return NULL;
nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL);
- if (nd_btt->id < 0) {
- kfree(nd_btt);
- return NULL;
- }
+ if (nd_btt->id < 0)
+ goto out_nd_btt;
nd_btt->lbasize = lbasize;
- if (uuid)
+ if (uuid) {
uuid = kmemdup(uuid, 16, GFP_KERNEL);
+ if (!uuid)
+ goto out_put_id;
+ }
nd_btt->uuid = uuid;
dev = &nd_btt->dev;
dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
@@ -212,6 +213,13 @@
return NULL;
}
return dev;
+
+out_put_id:
+ ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
+
+out_nd_btt:
+ kfree(nd_btt);
+ return NULL;
}
struct device *nd_btt_create(struct nd_region *nd_region)
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 9bc5f55..cf4a90b 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -2028,9 +2028,12 @@
if (!nsblk->uuid)
goto blk_err;
memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
- if (name[0])
+ if (name[0]) {
nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
GFP_KERNEL);
+ if (!nsblk->alt_name)
+ goto blk_err;
+ }
res = nsblk_add_resource(nd_region, ndd, nsblk,
__le64_to_cpu(nd_label->dpa));
if (!res)
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index c890a49..f655586 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -4422,14 +4422,16 @@
}
return AE_OK;
}
- default:
- dprintk("Resource %d isn't an IRQ nor an IO port\n",
- resource->type);
case ACPI_RESOURCE_TYPE_END_TAG:
return AE_OK;
+
+ default:
+ dprintk("Resource %d isn't an IRQ nor an IO port\n",
+ resource->type);
+ return AE_CTRL_TERMINATE;
+
}
- return AE_CTRL_TERMINATE;
}
static int sony_pic_possible_resources(struct acpi_device *device)
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 11c6335..9d77220 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2054,14 +2054,14 @@
blk_per_trk = recs_per_track(&private->rdc_data, 0, block->bp_block);
raw:
- block->blocks = (private->real_cyl *
+ block->blocks = ((unsigned long) private->real_cyl *
private->rdc_data.trk_per_cyl *
blk_per_trk);
dev_info(&device->cdev->dev,
- "DASD with %d KB/block, %d KB total size, %d KB/track, "
+ "DASD with %u KB/block, %lu KB total size, %u KB/track, "
"%s\n", (block->bp_block >> 10),
- ((private->real_cyl *
+ (((unsigned long) private->real_cyl *
private->rdc_data.trk_per_cyl *
blk_per_trk * (block->bp_block >> 9)) >> 1),
((blk_per_trk * block->bp_block) >> 10),
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 285b400..5d5e78a 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -628,7 +628,7 @@
(void (*)(unsigned long)) con3270_read_tasklet,
(unsigned long) condev->read);
- raw3270_add_view(&condev->view, &con3270_fn, 1);
+ raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ);
INIT_LIST_HEAD(&condev->freemem);
for (i = 0; i < CON3270_STRING_PAGES; i++) {
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 85eca1c..04a6810 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -462,7 +462,8 @@
init_waitqueue_head(&fp->wait);
fp->fs_pid = get_pid(task_pid(current));
- rc = raw3270_add_view(&fp->view, &fs3270_fn, minor);
+ rc = raw3270_add_view(&fp->view, &fs3270_fn, minor,
+ RAW3270_VIEW_LOCK_BH);
if (rc) {
fs3270_free_view(&fp->view);
goto out;
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index a2da898..1ebf632 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -919,7 +919,7 @@
* Add view to device with minor "minor".
*/
int
-raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor)
+raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor, int subclass)
{
unsigned long flags;
struct raw3270 *rp;
@@ -941,6 +941,7 @@
view->cols = rp->cols;
view->ascebc = rp->ascebc;
spin_lock_init(&view->lock);
+ lockdep_set_subclass(&view->lock, subclass);
list_add(&view->list, &rp->view_list);
rc = 0;
spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags);
diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h
index 56519cb..7577d7d 100644
--- a/drivers/s390/char/raw3270.h
+++ b/drivers/s390/char/raw3270.h
@@ -149,6 +149,8 @@
struct raw3270_view {
struct list_head list;
spinlock_t lock;
+#define RAW3270_VIEW_LOCK_IRQ 0
+#define RAW3270_VIEW_LOCK_BH 1
atomic_t ref_count;
struct raw3270 *dev;
struct raw3270_fn *fn;
@@ -157,7 +159,7 @@
unsigned char *ascebc; /* ascii -> ebcdic table */
};
-int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int);
+int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int);
int raw3270_activate_view(struct raw3270_view *);
void raw3270_del_view(struct raw3270_view *);
void raw3270_deactivate_view(struct raw3270_view *);
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index 272cb6c..6dd6f9f 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -978,7 +978,8 @@
return PTR_ERR(tp);
rc = raw3270_add_view(&tp->view, &tty3270_fn,
- tty->index + RAW3270_FIRSTMINOR);
+ tty->index + RAW3270_FIRSTMINOR,
+ RAW3270_VIEW_LOCK_BH);
if (rc) {
tty3270_free_view(tp);
return rc;
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index ad17fc5..e22b9ac 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -1595,6 +1595,7 @@
if (priv->channel[direction] == NULL) {
if (direction == CTCM_WRITE)
channel_free(priv->channel[CTCM_READ]);
+ result = -ENODEV;
goto out_dev;
}
priv->channel[direction]->netdev = dev;
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 944de65..f1e74f5 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -350,6 +350,7 @@
struct usb_serial_port *port = urb->context;
unsigned char *data = urb->transfer_buffer;
unsigned long flags;
+ bool stopped = false;
int status = urb->status;
int i;
@@ -357,33 +358,51 @@
if (urb == port->read_urbs[i])
break;
}
- set_bit(i, &port->read_urbs_free);
dev_dbg(&port->dev, "%s - urb %d, len %d\n", __func__, i,
urb->actual_length);
switch (status) {
case 0:
+ usb_serial_debug_data(&port->dev, __func__, urb->actual_length,
+ data);
+ port->serial->type->process_read_urb(urb);
break;
case -ENOENT:
case -ECONNRESET:
case -ESHUTDOWN:
dev_dbg(&port->dev, "%s - urb stopped: %d\n",
__func__, status);
- return;
+ stopped = true;
+ break;
case -EPIPE:
dev_err(&port->dev, "%s - urb stopped: %d\n",
__func__, status);
- return;
+ stopped = true;
+ break;
default:
dev_dbg(&port->dev, "%s - nonzero urb status: %d\n",
__func__, status);
- goto resubmit;
+ break;
}
- usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data);
- port->serial->type->process_read_urb(urb);
+ /*
+ * Make sure URB processing is done before marking as free to avoid
+ * racing with unthrottle() on another CPU. Matches the barriers
+ * implied by the test_and_clear_bit() in
+ * usb_serial_generic_submit_read_urb().
+ */
+ smp_mb__before_atomic();
+ set_bit(i, &port->read_urbs_free);
+ /*
+ * Make sure URB is marked as free before checking the throttled flag
+ * to avoid racing with unthrottle() on another CPU. Matches the
+ * smp_mb() in unthrottle().
+ */
+ smp_mb__after_atomic();
-resubmit:
+ if (stopped)
+ return;
+
/* Throttle the device if requested by tty */
spin_lock_irqsave(&port->lock, flags);
port->throttled = port->throttle_req;
@@ -458,6 +477,12 @@
port->throttled = port->throttle_req = 0;
spin_unlock_irq(&port->lock);
+ /*
+ * Matches the smp_mb__after_atomic() in
+ * usb_serial_generic_read_bulk_callback().
+ */
+ smp_mb();
+
if (was_throttled)
usb_serial_generic_submit_read_urbs(port, GFP_KERNEL);
}
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 150ce2a..732e9ab 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -215,6 +215,9 @@
* hypervisor.
*/
lb_offset = param.local_vaddr & (PAGE_SIZE - 1);
+ if (param.count == 0 ||
+ param.count > U64_MAX - lb_offset - PAGE_SIZE + 1)
+ return -EINVAL;
num_pages = (param.count + lb_offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
/* Allocate the buffers we need */
@@ -334,8 +337,8 @@
struct fsl_hv_ioctl_prop param;
char __user *upath, *upropname;
void __user *upropval;
- char *path = NULL, *propname = NULL;
- void *propval = NULL;
+ char *path, *propname;
+ void *propval;
int ret = 0;
/* Get the parameters from the user. */
@@ -347,32 +350,30 @@
upropval = (void __user *)(uintptr_t)param.propval;
path = strndup_user(upath, FH_DTPROP_MAX_PATHLEN);
- if (IS_ERR(path)) {
- ret = PTR_ERR(path);
- goto out;
- }
+ if (IS_ERR(path))
+ return PTR_ERR(path);
propname = strndup_user(upropname, FH_DTPROP_MAX_PATHLEN);
if (IS_ERR(propname)) {
ret = PTR_ERR(propname);
- goto out;
+ goto err_free_path;
}
if (param.proplen > FH_DTPROP_MAX_PROPLEN) {
ret = -EINVAL;
- goto out;
+ goto err_free_propname;
}
propval = kmalloc(param.proplen, GFP_KERNEL);
if (!propval) {
ret = -ENOMEM;
- goto out;
+ goto err_free_propname;
}
if (set) {
if (copy_from_user(propval, upropval, param.proplen)) {
ret = -EFAULT;
- goto out;
+ goto err_free_propval;
}
param.ret = fh_partition_set_dtprop(param.handle,
@@ -391,7 +392,7 @@
if (copy_to_user(upropval, propval, param.proplen) ||
put_user(param.proplen, &p->proplen)) {
ret = -EFAULT;
- goto out;
+ goto err_free_propval;
}
}
}
@@ -399,10 +400,12 @@
if (put_user(param.ret, &p->ret))
ret = -EFAULT;
-out:
- kfree(path);
+err_free_propval:
kfree(propval);
+err_free_propname:
kfree(propname);
+err_free_path:
+ kfree(path);
return ret;
}
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index b32efb5..279adb4 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -285,7 +285,7 @@
/* assert(atomic_read(acl->a_refcount) == 1); */
FOREACH_ACL_ENTRY(pa, acl, pe) {
- switch(pa->e_tag) {
+ switch (pa->e_tag) {
case ACL_USER_OBJ:
pa->e_perm &= (mode >> 6) | ~S_IRWXO;
mode &= (pa->e_perm << 6) | ~S_IRWXU;
@@ -326,7 +326,7 @@
}
*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
- return not_equiv;
+ return not_equiv;
}
static int f2fs_acl_create(struct inode *dir, umode_t *mode,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 5029480..d4c5310 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -66,7 +66,7 @@
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
- .is_meta = is_meta,
+ .is_por = !is_meta,
};
int err;
@@ -130,6 +130,30 @@
return __get_meta_page(sbi, index, false);
}
+static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
+ int type)
+{
+ struct seg_entry *se;
+ unsigned int segno, offset;
+ bool exist;
+
+ if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ)
+ return true;
+
+ segno = GET_SEGNO(sbi, blkaddr);
+ offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+ se = get_seg_entry(sbi, segno);
+
+ exist = f2fs_test_bit(offset, se->cur_valid_map);
+ if (!exist && type == DATA_GENERIC_ENHANCE) {
+ f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
+ "blkaddr:%u, sit bitmap:%d", blkaddr, exist);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ WARN_ON(1);
+ }
+ return exist;
+}
+
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
@@ -151,15 +175,22 @@
return false;
break;
case META_POR:
- case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
- blkaddr < MAIN_BLKADDR(sbi))) {
- if (type == DATA_GENERIC) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "access invalid blkaddr:%u", blkaddr);
- WARN_ON(1);
- }
+ blkaddr < MAIN_BLKADDR(sbi)))
return false;
+ break;
+ case DATA_GENERIC:
+ case DATA_GENERIC_ENHANCE:
+ case DATA_GENERIC_ENHANCE_READ:
+ if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "access invalid blkaddr:%u", blkaddr);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ WARN_ON(1);
+ return false;
+ } else {
+ return __is_bitmap_valid(sbi, blkaddr, type);
}
break;
case META_GENERIC:
@@ -189,7 +220,7 @@
.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
.encrypted_page = NULL,
.in_list = false,
- .is_meta = (type != META_POR),
+ .is_por = (type == META_POR),
};
struct blk_plug plug;
@@ -644,6 +675,12 @@
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
+ if (bdev_read_only(sbi->sb->s_bdev)) {
+ f2fs_msg(sbi->sb, KERN_INFO, "write access "
+ "unavailable, skipping orphan cleanup");
+ return 0;
+ }
+
if (s_flags & MS_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~MS_RDONLY;
@@ -758,13 +795,27 @@
}
}
+static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi,
+ struct f2fs_checkpoint *ckpt)
+{
+ unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset);
+ __u32 chksum;
+
+ chksum = f2fs_crc32(sbi, ckpt, chksum_ofs);
+ if (chksum_ofs < CP_CHKSUM_OFFSET) {
+ chksum_ofs += sizeof(chksum);
+ chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs,
+ F2FS_BLKSIZE - chksum_ofs);
+ }
+ return chksum;
+}
+
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
struct f2fs_checkpoint **cp_block, struct page **cp_page,
unsigned long long *version)
{
- unsigned long blk_size = sbi->blocksize;
size_t crc_offset = 0;
- __u32 crc = 0;
+ __u32 crc;
*cp_page = f2fs_get_meta_page(sbi, cp_addr);
if (IS_ERR(*cp_page))
@@ -773,15 +824,27 @@
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
- if (crc_offset > (blk_size - sizeof(__le32))) {
+ if (crc_offset < CP_MIN_CHKSUM_OFFSET ||
+ crc_offset > CP_CHKSUM_OFFSET) {
f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
}
- crc = cur_cp_crc(*cp_block);
- if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
+ if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) {
+ if (crc_offset != CP_MIN_CHKSUM_OFFSET) {
+ f2fs_put_page(*cp_page, 1);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "layout of large_nat_bitmap is deprecated, "
+ "run fsck to repair, chksum_offset: %zu",
+ crc_offset);
+ return -EINVAL;
+ }
+ }
+
+ crc = f2fs_checkpoint_chksum(sbi, *cp_block);
+ if (crc != cur_cp_crc(*cp_block)) {
f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
@@ -1009,13 +1072,11 @@
if (inode) {
unsigned long cur_ino = inode->i_ino;
- if (is_dir)
- F2FS_I(inode)->cp_task = current;
+ F2FS_I(inode)->cp_task = current;
filemap_fdatawrite(inode->i_mapping);
- if (is_dir)
- F2FS_I(inode)->cp_task = NULL;
+ F2FS_I(inode)->cp_task = NULL;
iput(inode);
/* We need to give cpu to another writers. */
@@ -1391,7 +1452,7 @@
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
- crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
+ crc32 = f2fs_checkpoint_chksum(sbi, ckpt);
*((__le32 *)((unsigned char *)ckpt +
le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
@@ -1475,7 +1536,11 @@
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+
+ spin_lock(&sbi->stat_lock);
sbi->unusable_block_count = 0;
+ spin_unlock(&sbi->stat_lock);
+
__set_cp_next_pack(sbi);
/*
@@ -1500,6 +1565,9 @@
unsigned long long ckpt_ver;
int err = 0;
+ if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi))
+ return -EROFS;
+
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (cpc->reason != CP_PAUSE)
return 0;
@@ -1516,10 +1584,6 @@
err = -EIO;
goto out;
}
- if (f2fs_readonly(sbi->sb)) {
- err = -EROFS;
- goto out;
- }
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index afcc11a..ae2e39f 100755
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -226,12 +226,14 @@
struct block_device *bdev = sbi->sb->s_bdev;
int i;
- for (i = 0; i < sbi->s_ndevs; i++) {
- if (FDEV(i).start_blk <= blk_addr &&
- FDEV(i).end_blk >= blk_addr) {
- blk_addr -= FDEV(i).start_blk;
- bdev = FDEV(i).bdev;
- break;
+ if (f2fs_is_multi_device(sbi)) {
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (FDEV(i).start_blk <= blk_addr &&
+ FDEV(i).end_blk >= blk_addr) {
+ blk_addr -= FDEV(i).start_blk;
+ bdev = FDEV(i).bdev;
+ break;
+ }
}
}
if (bio) {
@@ -245,6 +247,9 @@
{
int i;
+ if (!f2fs_is_multi_device(sbi))
+ return 0;
+
for (i = 0; i < sbi->s_ndevs; i++)
if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
return i;
@@ -453,7 +458,7 @@
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
{
- __submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
+ __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
}
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
@@ -482,7 +487,8 @@
struct inode *inode = fio->page->mapping->host;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
- __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ fio->is_por ? META_POR : (__is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC_ENHANCE)))
return -EFAULT;
trace_f2fs_submit_page_bio(page, fio);
@@ -542,9 +548,7 @@
spin_unlock(&io->io_lock);
}
- if (__is_valid_data_blkaddr(fio->old_blkaddr))
- verify_block_addr(fio, fio->old_blkaddr);
- verify_block_addr(fio, fio->new_blkaddr);
+ verify_fio_blkaddr(fio);
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
fio->op_flags |= fio->encrypted_page ? REQ_NOENCRYPT : 0;
@@ -615,9 +619,6 @@
struct bio_post_read_ctx *ctx;
unsigned int post_read_steps = 0;
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
- return ERR_PTR(-EFAULT);
-
bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio)
return ERR_PTR(-ENOMEM);
@@ -649,8 +650,10 @@
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr)
{
- struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct bio *bio;
+ bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -665,8 +668,8 @@
return -EFAULT;
}
ClearPageError(page);
- inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
- __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
+ inc_page_count(sbi, F2FS_RD_DATA);
+ __f2fs_submit_read_bio(sbi, bio, DATA);
return 0;
}
@@ -794,6 +797,11 @@
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ err = -EFAULT;
+ goto put_err;
+ }
goto got_it;
}
@@ -807,6 +815,13 @@
err = -ENOENT;
goto put_err;
}
+ if (dn.data_blkaddr != NEW_ADDR &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ err = -EFAULT;
+ goto put_err;
+ }
got_it:
if (PageUptodate(page)) {
unlock_page(page);
@@ -1149,12 +1164,12 @@
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
err = -EFAULT;
goto sync_out;
}
- if (is_valid_data_blkaddr(sbi, blkaddr)) {
+ if (__is_valid_data_blkaddr(blkaddr)) {
/* use out-place-update for driect IO under LFS mode */
if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
map->m_may_create) {
@@ -1564,6 +1579,130 @@
return ret;
}
+static int f2fs_read_single_page(struct inode *inode, struct page *page,
+ unsigned nr_pages,
+ struct f2fs_map_blocks *map,
+ struct bio **bio_ret,
+ sector_t *last_block_in_bio,
+ bool is_readahead)
+{
+ struct bio *bio = *bio_ret;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ sector_t block_in_file;
+ sector_t last_block;
+ sector_t last_block_in_file;
+ sector_t block_nr;
+ bool bio_encrypted;
+ u64 dun;
+ int ret = 0;
+
+ block_in_file = (sector_t)page->index;
+ last_block = block_in_file + nr_pages;
+ last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
+ blkbits;
+ if (last_block > last_block_in_file)
+ last_block = last_block_in_file;
+
+ /* just zeroing out page which is beyond EOF */
+ if (block_in_file >= last_block)
+ goto zero_out;
+ /*
+ * Map blocks using the previous result first.
+ */
+ if ((map->m_flags & F2FS_MAP_MAPPED) &&
+ block_in_file > map->m_lblk &&
+ block_in_file < (map->m_lblk + map->m_len))
+ goto got_it;
+
+ /*
+ * Then do more f2fs_map_blocks() calls until we are
+ * done with this page.
+ */
+ map->m_lblk = block_in_file;
+ map->m_len = last_block - block_in_file;
+
+ ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
+ if (ret)
+ goto out;
+got_it:
+ if ((map->m_flags & F2FS_MAP_MAPPED)) {
+ block_nr = map->m_pblk + block_in_file - map->m_lblk;
+ SetPageMappedToDisk(page);
+
+ if (!PageUptodate(page) && !cleancache_get_page(page)) {
+ SetPageUptodate(page);
+ goto confused;
+ }
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ } else {
+zero_out:
+ zero_user_segment(page, 0, PAGE_SIZE);
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ goto out;
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this
+ * BIO off first?
+ */
+ if (bio && (*last_block_in_bio != block_nr - 1 ||
+ !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
+submit_and_realloc:
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ bio = NULL;
+ }
+
+ dun = PG_DUN(inode, page);
+ bio_encrypted = f2fs_may_encrypt_bio(inode, NULL);
+ if (!fscrypt_mergeable_bio(bio, dun, bio_encrypted, 0)) {
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ bio = NULL;
+ }
+
+ if (bio == NULL) {
+ bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
+ is_readahead ? REQ_RAHEAD : 0);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ bio = NULL;
+ goto out;
+ }
+ if (bio_encrypted)
+ fscrypt_set_ice_dun(inode, bio, dun);
+ }
+
+ /*
+ * If the page is under writeback, we need to wait for
+ * its completion to see the correct decrypted data.
+ */
+ f2fs_wait_on_block_writeback(inode, block_nr);
+
+ if (bio_add_page(bio, page, blocksize, 0) < blocksize)
+ goto submit_and_realloc;
+
+ inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
+ ClearPageError(page);
+ *last_block_in_bio = block_nr;
+ goto out;
+confused:
+ if (bio) {
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ bio = NULL;
+ }
+ unlock_page(page);
+out:
+ *bio_ret = bio;
+ return ret;
+}
+
/*
* This function was originally taken from fs/mpage.c, and customized for f2fs.
* Major change was from block_size == page_size in f2fs by default.
@@ -1580,15 +1719,8 @@
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
struct inode *inode = mapping->host;
- const unsigned blkbits = inode->i_blkbits;
- const unsigned blocksize = 1 << blkbits;
- sector_t block_in_file;
- sector_t last_block;
- sector_t last_block_in_file;
- sector_t block_nr;
struct f2fs_map_blocks map;
- bool bio_encrypted;
- u64 dun;
+ int ret = 0;
map.m_pblk = 0;
map.m_lblk = 0;
@@ -1611,116 +1743,22 @@
goto next_page;
}
- block_in_file = (sector_t)page->index;
- last_block = block_in_file + nr_pages;
- last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
- blkbits;
- if (last_block > last_block_in_file)
- last_block = last_block_in_file;
-
- /* just zeroing out page which is beyond EOF */
- if (block_in_file >= last_block)
- goto zero_out;
- /*
- * Map blocks using the previous result first.
- */
- if ((map.m_flags & F2FS_MAP_MAPPED) &&
- block_in_file > map.m_lblk &&
- block_in_file < (map.m_lblk + map.m_len))
- goto got_it;
-
- /*
- * Then do more f2fs_map_blocks() calls until we are
- * done with this page.
- */
- map.m_lblk = block_in_file;
- map.m_len = last_block - block_in_file;
-
- if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT))
- goto set_error_page;
-got_it:
- if ((map.m_flags & F2FS_MAP_MAPPED)) {
- block_nr = map.m_pblk + block_in_file - map.m_lblk;
- SetPageMappedToDisk(page);
-
- if (!PageUptodate(page) && !cleancache_get_page(page)) {
- SetPageUptodate(page);
- goto confused;
- }
-
- if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
- DATA_GENERIC))
- goto set_error_page;
- } else {
-zero_out:
+ ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio,
+ &last_block_in_bio, is_readahead);
+ if (ret) {
+ SetPageError(page);
zero_user_segment(page, 0, PAGE_SIZE);
- if (!PageUptodate(page))
- SetPageUptodate(page);
unlock_page(page);
- goto next_page;
}
- /*
- * This page will go to BIO. Do we need to send this
- * BIO off first?
- */
- if (bio && (last_block_in_bio != block_nr - 1 ||
- !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
-submit_and_realloc:
- __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
-
- dun = PG_DUN(inode, page);
- bio_encrypted = f2fs_may_encrypt_bio(inode, NULL);
- if (!fscrypt_mergeable_bio(bio, dun, bio_encrypted, 0)) {
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
-
- if (bio == NULL) {
- bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
- is_readahead ? REQ_RAHEAD : 0);
- if (IS_ERR(bio)) {
- bio = NULL;
- goto set_error_page;
- }
- if (bio_encrypted)
- fscrypt_set_ice_dun(inode, bio, dun);
- }
-
- /*
- * If the page is under writeback, we need to wait for
- * its completion to see the correct decrypted data.
- */
- f2fs_wait_on_block_writeback(inode, block_nr);
-
- if (bio_add_page(bio, page, blocksize, 0) < blocksize)
- goto submit_and_realloc;
-
- inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
- ClearPageError(page);
- last_block_in_bio = block_nr;
- goto next_page;
-set_error_page:
- SetPageError(page);
- zero_user_segment(page, 0, PAGE_SIZE);
- unlock_page(page);
- goto next_page;
-confused:
- if (bio) {
- __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
- unlock_page(page);
next_page:
if (pages)
put_page(page);
}
BUG_ON(pages && !list_empty(pages));
if (bio)
- __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
- return 0;
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ return pages ? 0 : ret;
}
static int f2fs_read_data_page(struct file *file, struct page *page)
@@ -1893,7 +1931,7 @@
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
- DATA_GENERIC))
+ DATA_GENERIC_ENHANCE))
return -EFAULT;
ipu_force = true;
@@ -1920,7 +1958,7 @@
got_it:
if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
- DATA_GENERIC)) {
+ DATA_GENERIC_ENHANCE)) {
err = -EFAULT;
goto out_writepage;
}
@@ -1928,7 +1966,8 @@
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
+ if (ipu_force ||
+ (__is_valid_data_blkaddr(fio->old_blkaddr) &&
need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
@@ -1946,9 +1985,10 @@
true);
if (PageWriteback(page))
end_page_writeback(page);
+ } else {
+ set_inode_flag(inode, FI_UPDATE_WRITE);
}
trace_f2fs_do_write_data_page(fio->page, IPU);
- set_inode_flag(inode, FI_UPDATE_WRITE);
return err;
}
@@ -2110,7 +2150,8 @@
}
unlock_page(page);
- if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
+ if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
+ !F2FS_I(inode)->cp_task)
f2fs_balance_fs(sbi, need_balance_fs);
if (unlikely(f2fs_cp_error(sbi))) {
@@ -2581,6 +2622,11 @@
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
} else {
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ err = -EFAULT;
+ goto fail;
+ }
err = f2fs_submit_page_read(inode, page, blkaddr);
if (err)
goto fail;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4af0351..7f72bd0 100755
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -212,7 +212,14 @@
META_SSA,
META_MAX,
META_POR,
- DATA_GENERIC,
+ DATA_GENERIC, /* check range only */
+ DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */
+ DATA_GENERIC_ENHANCE_READ, /*
+ * strong check on range and segment
+ * bitmap but no warning due to race
+ * condition of read on truncated area
+ * by extent_cache
+ */
META_GENERIC,
};
@@ -1040,7 +1047,7 @@
bool submitted; /* indicate IO submission */
int need_lock; /* indicate we need to lock cp_rwsem */
bool in_list; /* indicate fio is in io_list */
- bool is_meta; /* indicate borrow meta inode mapping or not */
+ bool is_por; /* indicate IO is from recovery or not */
bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
@@ -1067,8 +1074,8 @@
block_t start_blk;
block_t end_blk;
#ifdef CONFIG_BLK_DEV_ZONED
- unsigned int nr_blkz; /* Total number of zones */
- u8 *blkz_type; /* Array of zones type */
+ unsigned int nr_blkz; /* Total number of zones */
+ unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
#endif
};
@@ -1365,6 +1372,17 @@
}
#endif
+/*
+ * Test if the mounted volume is a multi-device volume.
+ * - For a single regular disk volume, sbi->s_ndevs is 0.
+ * - For a single zoned disk volume, sbi->s_ndevs is 1.
+ * - For a multi-device volume, sbi->s_ndevs is always 2 or more.
+ */
+static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi)
+{
+ return sbi->s_ndevs > 1;
+}
+
/* For write statistics. Suppose sector size is 512 bytes,
* and the return value is in kbytes. s is of struct f2fs_sb_info.
*/
@@ -1777,6 +1795,7 @@
return -ENOSPC;
}
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
block_t count)
@@ -1785,13 +1804,21 @@
spin_lock(&sbi->stat_lock);
f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
- f2fs_bug_on(sbi, inode->i_blocks < sectors);
sbi->total_valid_block_count -= (block_t)count;
if (sbi->reserved_blocks &&
sbi->current_reserved_blocks < sbi->reserved_blocks)
sbi->current_reserved_blocks = min(sbi->reserved_blocks,
sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
+ if (unlikely(inode->i_blocks < sectors)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks,
+ (unsigned long long)sectors);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return;
+ }
f2fs_i_blocks_write(inode, count, false, true);
}
@@ -1889,7 +1916,11 @@
if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) {
offset = (flag == SIT_BITMAP) ?
le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0;
- return &ckpt->sit_nat_version_bitmap + offset;
+ /*
+ * if large_nat_bitmap feature is enabled, leave checksum
+ * protection for all nat/sit bitmaps.
+ */
+ return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32);
}
if (__cp_payload(sbi) > 0) {
@@ -2008,7 +2039,6 @@
f2fs_bug_on(sbi, !sbi->total_valid_block_count);
f2fs_bug_on(sbi, !sbi->total_valid_node_count);
- f2fs_bug_on(sbi, !is_inode && !inode->i_blocks);
sbi->total_valid_node_count--;
sbi->total_valid_block_count--;
@@ -2018,10 +2048,19 @@
spin_unlock(&sbi->stat_lock);
- if (is_inode)
+ if (is_inode) {
dquot_free_inode(inode);
- else
+ } else {
+ if (unlikely(inode->i_blocks == 0)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return;
+ }
f2fs_i_blocks_write(inode, 1, false, true);
+ }
}
static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
@@ -2543,9 +2582,18 @@
return is_inode_flag_set(inode, FI_INLINE_XATTR);
}
+#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a))
+
static inline unsigned int addrs_per_inode(struct inode *inode)
{
- return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode);
+ unsigned int addrs = CUR_ADDRS_PER_INODE(inode) -
+ get_inline_xattr_addrs(inode);
+ return ALIGN_DOWN(addrs, 1);
+}
+
+static inline unsigned int addrs_per_block(struct inode *inode)
+{
+ return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1);
}
static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
@@ -2558,7 +2606,9 @@
static inline int inline_xattr_size(struct inode *inode)
{
- return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ if (f2fs_has_inline_xattr(inode))
+ return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ return 0;
}
static inline int f2fs_has_inline_data(struct inode *inode)
@@ -2830,12 +2880,10 @@
#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
-#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META && \
- (!is_read_io((fio)->op) || (fio)->is_meta))
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
@@ -2854,15 +2902,6 @@
return true;
}
-static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
- block_t blkaddr)
-{
- if (!__is_valid_data_blkaddr(blkaddr))
- return false;
- verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
- return true;
-}
-
static inline void f2fs_set_page_private(struct page *page,
unsigned long data)
{
@@ -3558,16 +3597,12 @@
F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
#ifdef CONFIG_BLK_DEV_ZONED
-static inline int get_blkz_type(struct f2fs_sb_info *sbi,
- struct block_device *bdev, block_t blkaddr)
+static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
+ block_t blkaddr)
{
unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
- int i;
- for (i = 0; i < sbi->s_ndevs; i++)
- if (FDEV(i).bdev == bdev)
- return FDEV(i).blkz_type[zno];
- return -EINVAL;
+ return test_bit(zno, FDEV(devi).blkz_seq);
}
#endif
@@ -3576,9 +3611,27 @@
return f2fs_sb_has_blkzoned(sbi);
}
+static inline bool f2fs_bdev_support_discard(struct block_device *bdev)
+{
+ return blk_queue_discard(bdev_get_queue(bdev)) ||
+#ifdef CONFIG_BLK_DEV_ZONED
+ bdev_is_zoned(bdev);
+#else
+ 0;
+#endif
+}
+
static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
{
- return blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev));
+ int i;
+
+ if (!f2fs_is_multi_device(sbi))
+ return f2fs_bdev_support_discard(sbi->sb->s_bdev);
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (f2fs_bdev_support_discard(FDEV(i).bdev))
+ return true;
+ return false;
}
static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi)
@@ -3587,6 +3640,20 @@
f2fs_hw_should_discard(sbi);
}
+static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ if (!f2fs_is_multi_device(sbi))
+ return bdev_read_only(sbi->sb->s_bdev);
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (bdev_read_only(FDEV(i).bdev))
+ return true;
+ return false;
+}
+
+
static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
{
clear_opt(sbi, ADAPTIVE);
@@ -3643,7 +3710,7 @@
if (f2fs_post_read_required(inode) &&
!fscrypt_using_hardware_encryption(inode))
return true;
- if (sbi->s_ndevs)
+ if (f2fs_is_multi_device(sbi))
return true;
/*
* for blkzoned device, fallback direct IO to buffered IO, so
@@ -3690,4 +3757,4 @@
return false;
}
-#endif
+#endif /* _LINUX_F2FS_H */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8fc5c3c..24fe54e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -40,6 +40,8 @@
err = filemap_fault(vma, vmf);
up_read(&F2FS_I(inode)->i_mmap_sem);
+ trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)err);
+
return err;
}
@@ -358,7 +360,7 @@
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- is_valid_data_blkaddr(sbi, blkaddr))
+ __is_valid_data_blkaddr(blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -424,7 +426,7 @@
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
- blkaddr, DATA_GENERIC)) {
+ blkaddr, DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
goto fail;
}
@@ -525,7 +527,8 @@
f2fs_set_data_blkaddr(dn);
if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ !f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE))
continue;
f2fs_invalidate_blocks(sbi, blkaddr);
@@ -554,7 +557,7 @@
void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
{
- f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
+ f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
}
static int truncate_partial_data_page(struct inode *inode, u64 from,
@@ -1009,7 +1012,8 @@
} else if (ret == -ENOENT) {
if (dn.max_level == 0)
return -ENOENT;
- done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len);
+ done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node,
+ len);
blkaddr += done;
do_replace += done;
goto next;
@@ -1020,6 +1024,14 @@
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
*blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
+
+ if (__is_valid_data_blkaddr(*blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, *blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ f2fs_put_dnode(&dn);
+ return -EFAULT;
+ }
+
if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
if (test_opt(sbi, LFS)) {
@@ -1160,7 +1172,7 @@
int ret;
while (len) {
- olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
+ olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
array_size(olen, sizeof(block_t)),
@@ -2569,10 +2581,10 @@
sizeof(range)))
return -EFAULT;
- if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
+ if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
__is_large_section(sbi)) {
f2fs_msg(sbi->sb, KERN_WARNING,
- "Can't flush %u in %d for segs_per_sec %u != 1\n",
+ "Can't flush %u in %d for segs_per_sec %u != 1",
range.dev_num, sbi->s_ndevs,
sbi->segs_per_sec);
return -EINVAL;
@@ -2634,7 +2646,7 @@
if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: Enable GC = ino %lx after %x GC trials\n",
+ "%s: Enable GC = ino %lx after %x GC trials",
__func__, inode->i_ino,
fi->i_gc_failures[GC_FAILURE_PIN]);
clear_inode_flag(inode, FI_PIN_FILE);
@@ -2807,15 +2819,21 @@
struct inode *inode = file_inode(file);
ssize_t ret;
- if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
- return -EIO;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+ ret = -EIO;
+ goto out;
+ }
- if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
- return -EINVAL;
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) {
+ ret = -EINVAL;
+ goto out;
+ }
if (!inode_trylock(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
- return -EAGAIN;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
inode_lock(inode);
}
@@ -2828,19 +2846,16 @@
if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
set_inode_flag(inode, FI_NO_PREALLOC);
- if ((iocb->ki_flags & IOCB_NOWAIT) &&
- (iocb->ki_flags & IOCB_DIRECT)) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
+ if ((iocb->ki_flags & IOCB_NOWAIT)) {
+ if (!f2fs_overwrite_io(inode, iocb->ki_pos,
iov_iter_count(from)) ||
- f2fs_has_inline_data(inode) ||
- f2fs_force_buffered_io(inode,
- iocb, from)) {
- clear_inode_flag(inode,
- FI_NO_PREALLOC);
- inode_unlock(inode);
- return -EAGAIN;
- }
-
+ f2fs_has_inline_data(inode) ||
+ f2fs_force_buffered_io(inode, iocb, from)) {
+ clear_inode_flag(inode, FI_NO_PREALLOC);
+ inode_unlock(inode);
+ ret = -EAGAIN;
+ goto out;
+ }
} else {
preallocated = true;
target_size = iocb->ki_pos + iov_iter_count(from);
@@ -2849,7 +2864,8 @@
if (err) {
clear_inode_flag(inode, FI_NO_PREALLOC);
inode_unlock(inode);
- return err;
+ ret = err;
+ goto out;
}
}
ret = __generic_file_write_iter(iocb, from);
@@ -2863,7 +2879,9 @@
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
}
inode_unlock(inode);
-
+out:
+ trace_f2fs_file_write_iter(inode, iocb->ki_pos,
+ iov_iter_count(from), ret);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d421720..24b74d6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -591,7 +591,7 @@
int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 5 - dec;
}
- return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
+ return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode);
}
static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -656,6 +656,11 @@
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE_READ))) {
+ err = -EFAULT;
+ goto put_page;
+ }
goto got_it;
}
@@ -665,8 +670,12 @@
goto put_page;
f2fs_put_dnode(&dn);
+ if (!__is_valid_data_blkaddr(dn.data_blkaddr)) {
+ err = -ENOENT;
+ goto put_page;
+ }
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
- DATA_GENERIC))) {
+ DATA_GENERIC_ENHANCE))) {
err = -EFAULT;
goto put_page;
}
@@ -1175,6 +1184,7 @@
"type [%d, %d] in SSA and SIT",
segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_stop_checkpoint(sbi, false);
goto skip;
}
@@ -1346,7 +1356,7 @@
sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
/* give warm/cold data area from slower device */
- if (sbi->s_ndevs && !__is_large_section(sbi))
+ if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 8422a13..d67622c 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -438,6 +438,14 @@
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
+ /*
+ * should retrieve reserved space which was used to keep
+ * inline_dentry's structure for backward compatibility.
+ */
+ if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) &&
+ !f2fs_has_inline_xattr(dir))
+ F2FS_I(dir)->i_inline_xattr_size = 0;
+
f2fs_i_depth_write(dir, 1);
if (i_size_read(dir) < PAGE_SIZE)
f2fs_i_size_write(dir, PAGE_SIZE);
@@ -519,6 +527,15 @@
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
+
+ /*
+ * should retrieve reserved space which was used to keep
+ * inline_dentry's structure for backward compatibility.
+ */
+ if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) &&
+ !f2fs_has_inline_xattr(dir))
+ F2FS_I(dir)->i_inline_xattr_size = 0;
+
kvfree(backup_dentry);
return 0;
recover:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index e7f2e87..ccb0222 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -73,7 +73,7 @@
if (!__is_valid_data_blkaddr(addr))
return 1;
- if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE))
return -EFAULT;
return 0;
}
@@ -177,8 +177,8 @@
if (provided != calculated)
f2fs_msg(sbi->sb, KERN_WARNING,
- "checksum invalid, ino = %x, %x vs. %x",
- ino_of_node(page), provided, calculated);
+ "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
+ page->index, ino_of_node(page), provided, calculated);
return provided == calculated;
}
@@ -267,9 +267,10 @@
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
if (ei->len &&
- (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk,
+ DATA_GENERIC_ENHANCE) ||
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
- DATA_GENERIC))) {
+ DATA_GENERIC_ENHANCE))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) extent info [%u, %u, %u] "
@@ -488,6 +489,7 @@
return inode;
bad_inode:
+ f2fs_inode_synced(inode);
iget_failed(inode);
trace_f2fs_iget_exit(inode, ret);
return ERR_PTR(ret);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 59c0923..b70ab57 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -143,7 +143,7 @@
return ERR_PTR(err);
}
-static int is_extension_exist(const unsigned char *s, const char *sub)
+static inline int is_extension_exist(const unsigned char *s, const char *sub)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 93e9d42..6e8f75e1 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -454,7 +454,7 @@
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
+ f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -465,7 +465,7 @@
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (!is_valid_data_blkaddr(sbi, new_blkaddr))
+ if (!__is_valid_data_blkaddr(new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -526,6 +526,7 @@
struct f2fs_nat_entry ne;
struct nat_entry *e;
pgoff_t index;
+ block_t blkaddr;
int i;
ni->nid = nid;
@@ -569,6 +570,11 @@
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
+ blkaddr = le32_to_cpu(ne.block_addr);
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
+ return -EFAULT;
+
/* cache nat entry */
cache_nat_entry(sbi, nid, &ne);
return 0;
@@ -600,9 +606,9 @@
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
{
const long direct_index = ADDRS_PER_INODE(dn->inode);
- const long direct_blks = ADDRS_PER_BLOCK;
- const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
- unsigned int skipped_unit = ADDRS_PER_BLOCK;
+ const long direct_blks = ADDRS_PER_BLOCK(dn->inode);
+ const long indirect_blks = ADDRS_PER_BLOCK(dn->inode) * NIDS_PER_BLOCK;
+ unsigned int skipped_unit = ADDRS_PER_BLOCK(dn->inode);
int cur_level = dn->cur_level;
int max_level = dn->max_level;
pgoff_t base = 0;
@@ -636,9 +642,9 @@
int offset[4], unsigned int noffset[4])
{
const long direct_index = ADDRS_PER_INODE(inode);
- const long direct_blks = ADDRS_PER_BLOCK;
+ const long direct_blks = ADDRS_PER_BLOCK(inode);
const long dptrs_per_blk = NIDS_PER_BLOCK;
- const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+ const long indirect_blks = ADDRS_PER_BLOCK(inode) * NIDS_PER_BLOCK;
const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
int n = 0;
int level = 0;
@@ -1179,8 +1185,14 @@
f2fs_put_dnode(&dn);
return -EIO;
}
- f2fs_bug_on(F2FS_I_SB(inode),
- inode->i_blocks != 0 && inode->i_blocks != 8);
+
+ if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
+ f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
+ "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ }
/* will put inode & node pages */
err = truncate_node(&dn);
@@ -1275,9 +1287,10 @@
int err;
if (PageUptodate(page)) {
-#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
-#endif
+ if (!f2fs_inode_chksum_verify(sbi, page)) {
+ ClearPageUptodate(page);
+ return -EBADMSG;
+ }
return LOCKED_PAGE;
}
@@ -1543,7 +1556,8 @@
}
if (__is_valid_data_blkaddr(ni.blk_addr) &&
- !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr,
+ DATA_GENERIC_ENHANCE)) {
up_read(&sbi->node_write);
goto redirty_out;
}
@@ -2078,6 +2092,9 @@
if (unlikely(nid == 0))
return false;
+ if (unlikely(f2fs_check_nid_range(sbi, nid)))
+ return false;
+
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = FREE_NID;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 5a99911..1217501 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -317,8 +317,10 @@
break;
}
- if (!is_recoverable_dnode(page))
+ if (!is_recoverable_dnode(page)) {
+ f2fs_put_page(page, 1);
break;
+ }
if (!is_fsync_dnode(page))
goto next;
@@ -330,8 +332,10 @@
if (!check_only &&
IS_INODE(page) && is_dent_dnode(page)) {
err = f2fs_recover_inode_page(sbi, page);
- if (err)
+ if (err) {
+ f2fs_put_page(page, 1);
break;
+ }
quota_inode = true;
}
@@ -347,6 +351,7 @@
err = 0;
goto next;
}
+ f2fs_put_page(page, 1);
break;
}
}
@@ -362,6 +367,7 @@
"%s: detect looped node chain, "
"blkaddr:%u, next:%u",
__func__, blkaddr, next_blkaddr_of_node(page));
+ f2fs_put_page(page, 1);
err = -EINVAL;
break;
}
@@ -372,7 +378,6 @@
f2fs_ra_meta_pages_cond(sbi, blkaddr);
}
- f2fs_put_page(page, 1);
return err;
}
@@ -538,7 +543,15 @@
goto err;
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
- f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
+
+ if (ofs_of_node(dn.node_page) != ofs_of_node(page)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
+ inode->i_ino, ofs_of_node(dn.node_page),
+ ofs_of_node(page));
+ err = -EFAULT;
+ goto err;
+ }
for (; start < end; start++, dn.ofs_in_node++) {
block_t src, dest;
@@ -546,6 +559,18 @@
src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
+ if (__is_valid_data_blkaddr(src) &&
+ !f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ if (__is_valid_data_blkaddr(dest) &&
+ !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
+ err = -EFAULT;
+ goto err;
+ }
+
/* skip recovering if dest is the same as src */
if (src == dest)
continue;
@@ -658,8 +683,10 @@
*/
if (IS_INODE(page)) {
err = recover_inode(entry->inode, page);
- if (err)
+ if (err) {
+ f2fs_put_page(page, 1);
break;
+ }
}
if (entry->last_dentry == blkaddr) {
err = recover_dentry(entry->inode, page, dir_list);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 696da20..2a8dc33 100755
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -580,7 +580,7 @@
int ret = 0;
int i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return __submit_flush_wait(sbi, sbi->sb->s_bdev);
for (i = 0; i < sbi->s_ndevs; i++) {
@@ -648,7 +648,8 @@
return ret;
}
- if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
+ if (atomic_inc_return(&fcc->queued_flush) == 1 ||
+ f2fs_is_multi_device(sbi)) {
ret = submit_flush_wait(sbi, ino);
atomic_dec(&fcc->queued_flush);
@@ -754,7 +755,7 @@
{
int ret = 0, i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return 0;
for (i = 1; i < sbi->s_ndevs; i++) {
@@ -1363,9 +1364,12 @@
{
block_t lblkstart = blkstart;
+ if (!f2fs_bdev_support_discard(bdev))
+ return 0;
+
trace_f2fs_queue_discard(bdev, blkstart, blklen);
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
int devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
@@ -1728,42 +1732,36 @@
block_t lblkstart = blkstart;
int devi = 0;
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
devi = f2fs_target_device_index(sbi, blkstart);
+ if (blkstart < FDEV(devi).start_blk ||
+ blkstart > FDEV(devi).end_blk) {
+ f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
+ blkstart);
+ return -EIO;
+ }
blkstart -= FDEV(devi).start_blk;
}
- /*
- * We need to know the type of the zone: for conventional zones,
- * use regular discard if the drive supports it. For sequential
- * zones, reset the zone write pointer.
- */
- switch (get_blkz_type(sbi, bdev, blkstart)) {
-
- case BLK_ZONE_TYPE_CONVENTIONAL:
- if (!blk_queue_discard(bdev_get_queue(bdev)))
- return 0;
- return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
- case BLK_ZONE_TYPE_SEQWRITE_REQ:
- case BLK_ZONE_TYPE_SEQWRITE_PREF:
+ /* For sequential zones, reset the zone write pointer */
+ if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
sector = SECTOR_FROM_BLOCK(blkstart);
nr_sects = SECTOR_FROM_BLOCK(blklen);
if (sector & (bdev_zone_sectors(bdev) - 1) ||
nr_sects != bdev_zone_sectors(bdev)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "(%d) %s: Unaligned discard attempted (block %x + %x)",
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
devi, sbi->s_ndevs ? FDEV(devi).path: "",
blkstart, blklen);
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_reset_zones(bdev, sector,
- nr_sects, GFP_NOFS);
- default:
- /* Unknown zone type: broken device ? */
- return -EIO;
+ return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
}
+
+ /* For conventional zones, use regular discard if supported */
+ return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
}
#endif
@@ -1771,8 +1769,7 @@
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi) &&
- bdev_zoned_model(bdev) != BLK_ZONED_NONE)
+ if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
@@ -2168,8 +2165,11 @@
* before, we must track that to know how much space we
* really have.
*/
- if (f2fs_test_bit(offset, se->ckpt_valid_map))
+ if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
+ spin_lock(&sbi->stat_lock);
sbi->unusable_block_count++;
+ spin_unlock(&sbi->stat_lock);
+ }
}
if (f2fs_test_and_clear_bit(offset, se->discard_map))
@@ -2216,7 +2216,7 @@
struct seg_entry *se;
bool is_cp = false;
- if (!is_valid_data_blkaddr(sbi, blkaddr))
+ if (!__is_valid_data_blkaddr(blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -3086,7 +3086,7 @@
struct f2fs_sb_info *sbi = fio->sbi;
unsigned int devidx;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return;
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
@@ -3184,13 +3184,18 @@
{
int err;
struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int segno;
fio->new_blkaddr = fio->old_blkaddr;
/* i/o temperature is needed for passing down write hints */
__get_segment_type(fio);
- f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
- GET_SEGNO(sbi, fio->new_blkaddr))->type));
+ segno = GET_SEGNO(sbi, fio->new_blkaddr);
+
+ if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return -EFAULT;
+ }
stat_inc_inplace_blocks(fio->sbi);
@@ -3333,7 +3338,7 @@
if (!f2fs_post_read_required(inode))
return;
- if (!is_valid_data_blkaddr(sbi, blkaddr))
+ if (!__is_valid_data_blkaddr(blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5c7ed04..429007b 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -82,7 +82,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
+ ((!__is_valid_data_blkaddr(blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
@@ -656,14 +656,15 @@
f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
}
-static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
+static inline void verify_fio_blkaddr(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
- if (__is_meta_io(fio))
- verify_blkaddr(sbi, blk_addr, META_GENERIC);
- else
- verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
+ verify_blkaddr(sbi, fio->old_blkaddr, __is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC);
+ verify_blkaddr(sbi, fio->new_blkaddr, __is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC_ENHANCE);
}
/*
@@ -672,7 +673,6 @@
static inline int check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
{
-#ifdef CONFIG_F2FS_CHECK_FS
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
int cur_pos = 0, next_pos;
@@ -699,7 +699,7 @@
set_sbi_flag(sbi, SBI_NEED_FSCK);
return -EINVAL;
}
-#endif
+
/* check segment usage, and check boundary of a given segment number */
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 491deb249..a6de02a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1024,7 +1024,7 @@
for (i = 0; i < sbi->s_ndevs; i++) {
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
- kvfree(FDEV(i).blkz_type);
+ kvfree(FDEV(i).blkz_seq);
#endif
}
kvfree(sbi->devs);
@@ -1226,10 +1226,13 @@
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
sbi->current_reserved_blocks;
+
+ spin_lock(&sbi->stat_lock);
if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
buf->f_bfree = 0;
else
buf->f_bfree -= sbi->unusable_block_count;
+ spin_unlock(&sbi->stat_lock);
if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
buf->f_bavail = buf->f_bfree -
@@ -1504,9 +1507,15 @@
mutex_lock(&sbi->gc_mutex);
cpc.reason = CP_PAUSE;
set_sbi_flag(sbi, SBI_CP_DISABLED);
- f2fs_write_checkpoint(sbi, &cpc);
+ err = f2fs_write_checkpoint(sbi, &cpc);
+ if (err)
+ goto out_unlock;
+ spin_lock(&sbi->stat_lock);
sbi->unusable_block_count = 0;
+ spin_unlock(&sbi->stat_lock);
+
+out_unlock:
mutex_unlock(&sbi->gc_mutex);
restore_flag:
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -2276,7 +2285,7 @@
static loff_t max_file_blocks(void)
{
loff_t result = 0;
- loff_t leaf_count = ADDRS_PER_BLOCK;
+ loff_t leaf_count = DEF_ADDRS_PER_BLOCK;
/*
* note: previously, result is equal to (DEF_ADDRS_PER_INODE -
@@ -2454,7 +2463,7 @@
/* Currently, support only 4KB page cache size */
if (F2FS_BLKSIZE != PAGE_SIZE) {
f2fs_msg(sb, KERN_INFO,
- "Invalid page_cache_size (%lu), supports only 4KB\n",
+ "Invalid page_cache_size (%lu), supports only 4KB",
PAGE_SIZE);
return 1;
}
@@ -2463,7 +2472,7 @@
blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
if (blocksize != F2FS_BLKSIZE) {
f2fs_msg(sb, KERN_INFO,
- "Invalid blocksize (%u), supports only 4KB\n",
+ "Invalid blocksize (%u), supports only 4KB",
blocksize);
return 1;
}
@@ -2471,7 +2480,7 @@
/* check log blocks per segment */
if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
f2fs_msg(sb, KERN_INFO,
- "Invalid log blocks per segment (%u)\n",
+ "Invalid log blocks per segment (%u)",
le32_to_cpu(raw_super->log_blocks_per_seg));
return 1;
}
@@ -2592,7 +2601,8 @@
unsigned int log_blocks_per_seg;
unsigned int segment_count_main;
unsigned int cp_pack_start_sum, cp_payload;
- block_t user_block_count;
+ block_t user_block_count, valid_user_blocks;
+ block_t avail_node_count, valid_node_count;
int i, j;
total = le32_to_cpu(raw_super->segment_count);
@@ -2627,6 +2637,24 @@
return 1;
}
+ valid_user_blocks = le64_to_cpu(ckpt->valid_block_count);
+ if (valid_user_blocks > user_block_count) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong valid_user_blocks: %u, user_block_count: %u",
+ valid_user_blocks, user_block_count);
+ return 1;
+ }
+
+ valid_node_count = le32_to_cpu(ckpt->valid_node_count);
+ avail_node_count = sbi->total_node_count - sbi->nquota_files -
+ F2FS_RESERVED_NODE_NUM;
+ if (valid_node_count > avail_node_count) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong valid_node_count: %u, avail_node_count: %u",
+ valid_node_count, avail_node_count);
+ return 1;
+ }
+
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
@@ -2798,9 +2826,11 @@
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
- FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
- GFP_KERNEL);
- if (!FDEV(devi).blkz_type)
+ FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
+ BITS_TO_LONGS(FDEV(devi).nr_blkz)
+ * sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!FDEV(devi).blkz_seq)
return -ENOMEM;
#define F2FS_REPORT_NR_ZONES 4096
@@ -2827,7 +2857,8 @@
}
for (i = 0; i < nr_zones; i++) {
- FDEV(devi).blkz_type[n] = zones[i].type;
+ if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
+ set_bit(n, FDEV(devi).blkz_seq);
sector += zones[i].len;
n++;
}
@@ -3114,7 +3145,7 @@
#ifndef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sb, KERN_ERR,
- "Zoned block device support is not enabled\n");
+ "Zoned block device support is not enabled");
err = -EOPNOTSUPP;
goto free_sb_buf;
}
@@ -3358,10 +3389,17 @@
* mount should be failed, when device has readonly mode, and
* previous checkpoint was not done by clean system shutdown.
*/
- if (bdev_read_only(sb->s_bdev) &&
- !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
- err = -EROFS;
- goto free_meta;
+ if (f2fs_hw_is_readonly(sbi)) {
+ if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ err = -EROFS;
+ f2fs_msg(sb, KERN_ERR,
+ "Need to recover fsync data, but "
+ "write access unavailable");
+ goto free_meta;
+ }
+ f2fs_msg(sbi->sb, KERN_INFO, "write access "
+ "unavailable, skipping recovery");
+ goto reset_checkpoint;
}
if (need_fsck)
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 848a785..e791741 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -202,12 +202,17 @@
return handler;
}
-static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
- size_t len, const char *name)
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr,
+ void *last_base_addr, int index,
+ size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
+ if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
+ (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr)
+ return NULL;
+
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
@@ -297,20 +302,22 @@
const char *name, struct f2fs_xattr_entry **xe,
void **base_addr, int *base_size)
{
- void *cur_addr, *txattr_addr, *last_addr = NULL;
+ void *cur_addr, *txattr_addr, *last_txattr_addr;
+ void *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
- unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
unsigned int inline_size = inline_xattr_size(inode);
int err = 0;
- if (!size && !inline_size)
+ if (!xnid && !inline_size)
return -ENODATA;
- *base_size = inline_size + size + XATTR_PADDING_SIZE;
+ *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE;
txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS);
if (!txattr_addr)
return -ENOMEM;
+ last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode);
+
/* read from inline xattr */
if (inline_size) {
err = read_inline_xattr(inode, ipage, txattr_addr);
@@ -337,7 +344,11 @@
else
cur_addr = txattr_addr;
- *xe = __find_xattr(cur_addr, index, len, name);
+ *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
+ if (!*xe) {
+ err = -EFAULT;
+ goto out;
+ }
check:
if (IS_XATTR_LAST_ENTRY(*xe)) {
err = -ENODATA;
@@ -581,7 +592,8 @@
struct page *ipage, int flags)
{
struct f2fs_xattr_entry *here, *last;
- void *base_addr;
+ void *base_addr, *last_base_addr;
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int found, newsize;
size_t len;
__u32 new_hsize;
@@ -605,8 +617,14 @@
if (error)
return error;
+ last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode);
+
/* find entry with wanted name. */
- here = __find_xattr(base_addr, index, len, name);
+ here = __find_xattr(base_addr, last_base_addr, index, len, name);
+ if (!here) {
+ error = -EFAULT;
+ goto exit;
+ }
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 9172ee0..a90920e 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -71,6 +71,8 @@
entry = XATTR_NEXT_ENTRY(entry))
#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define XATTR_PADDING_SIZE (sizeof(__u32))
+#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \
+ (inline_xattr_size(i)))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 2877ccb..02c4f16 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1433,7 +1433,12 @@
struct screen_info *si, efi_guid_t *proto,
unsigned long size);
-bool efi_runtime_disabled(void);
+#ifdef CONFIG_EFI
+extern bool efi_runtime_disabled(void);
+#else
+static inline bool efi_runtime_disabled(void) { return true; }
+#endif
+
extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
/*
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f574042..6555990 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -164,6 +164,10 @@
unsigned char sit_nat_version_bitmap[1];
} __packed;
+#define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */
+#define CP_MIN_CHKSUM_OFFSET \
+ (offsetof(struct f2fs_checkpoint, sit_nat_version_bitmap))
+
/*
* For orphan inode management
*/
@@ -198,11 +202,12 @@
get_extra_isize(inode))
#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */
#define ADDRS_PER_INODE(inode) addrs_per_inode(inode)
-#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
+#define DEF_ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
+#define ADDRS_PER_BLOCK(inode) addrs_per_block(inode)
#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */
#define ADDRS_PER_PAGE(page, inode) \
- (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK)
+ (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode))
#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1)
#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2)
@@ -267,7 +272,7 @@
} __packed;
struct direct_node {
- __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */
+ __le32 addr[DEF_ADDRS_PER_BLOCK]; /* array of data block address */
} __packed;
struct indirect_node {
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index b01fe10..87ff4f5 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -29,6 +29,11 @@
((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+ })
/**
* ptr_is_a_nulls - Test if a ptr is a nulls
* @ptr: ptr to be tested
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 6224a0a..2720b2f 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -118,5 +118,19 @@
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
+/**
+ * hlist_nulls_for_each_entry_safe -
+ * iterate over list of given type safe against removal of list entry
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_nulls_node to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_nulls_node within the struct.
+ */
+#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \
+ for (({barrier();}), \
+ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
+ (!is_a_nulls(pos)) && \
+ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \
+ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
#endif
#endif
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index e238f54..cea051b 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -526,6 +526,37 @@
__entry->err)
);
+TRACE_EVENT(f2fs_file_write_iter,
+
+ TP_PROTO(struct inode *inode, unsigned long offset,
+ unsigned long length, int ret),
+
+ TP_ARGS(inode, offset, length, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(unsigned long, offset)
+ __field(unsigned long, length)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->offset = offset;
+ __entry->length = length;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, "
+ "offset = %lu, length = %lu, written(err) = %d",
+ show_dev_ino(__entry),
+ __entry->offset,
+ __entry->length,
+ __entry->ret)
+);
+
TRACE_EVENT(f2fs_map_blocks,
TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret),
@@ -1246,6 +1277,32 @@
TP_ARGS(page, type)
);
+TRACE_EVENT(f2fs_filemap_fault,
+
+ TP_PROTO(struct inode *inode, pgoff_t index, unsigned long ret),
+
+ TP_ARGS(inode, index, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(pgoff_t, index)
+ __field(unsigned long, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->index = index;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, index = %lu, ret = %lx",
+ show_dev_ino(__entry),
+ (unsigned long)__entry->index,
+ __entry->ret)
+);
+
TRACE_EVENT(f2fs_writepages,
TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type),
diff --git a/init/main.c b/init/main.c
index b3c0e4b..a5c9e57 100644
--- a/init/main.c
+++ b/init/main.c
@@ -515,6 +515,8 @@
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
+ /* parameters may set static keys */
+ jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
@@ -524,8 +526,6 @@
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);
- jump_label_init();
-
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 9c86d5d..b6e2bfd 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,12 +13,13 @@
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/filter.h>
+#include <linux/rculist_nulls.h>
#include "percpu_freelist.h"
#define HTAB_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_RDONLY | BPF_F_WRONLY)
struct bucket {
- struct hlist_head head;
+ struct hlist_nulls_head head;
raw_spinlock_t lock;
};
@@ -42,9 +43,14 @@
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
- struct hlist_node hash_node;
- struct bpf_htab *htab;
- struct pcpu_freelist_node fnode;
+ struct hlist_nulls_node hash_node;
+ struct {
+ void *padding;
+ union {
+ struct bpf_htab *htab;
+ struct pcpu_freelist_node fnode;
+ };
+ };
};
union {
struct rcu_head rcu;
@@ -116,8 +122,10 @@
if (err)
goto free_elems;
- pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size,
- htab->map.max_entries);
+ pcpu_freelist_populate(&htab->freelist,
+ htab->elems + offsetof(struct htab_elem, fnode),
+ htab->elem_size, htab->map.max_entries);
+
return 0;
free_elems:
@@ -150,6 +158,11 @@
int err, i;
u64 cost;
+ BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
+ offsetof(struct htab_elem, hash_node.pprev));
+ BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+ offsetof(struct htab_elem, hash_node.pprev));
+
if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
/* reserved bits should not be used */
return ERR_PTR(-EINVAL);
@@ -235,7 +248,7 @@
goto free_htab;
for (i = 0; i < htab->n_buckets; i++) {
- INIT_HLIST_HEAD(&htab->buckets[i].head);
+ INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
raw_spin_lock_init(&htab->buckets[i].lock);
}
@@ -272,28 +285,52 @@
return &htab->buckets[hash & (htab->n_buckets - 1)];
}
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
{
return &__select_bucket(htab, hash)->head;
}
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+/* this lookup function can only be called with bucket lock taken */
+static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
void *key, u32 key_size)
{
+ struct hlist_nulls_node *n;
struct htab_elem *l;
- hlist_for_each_entry_rcu(l, head, hash_node)
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l->hash == hash && !memcmp(&l->key, key, key_size))
return l;
return NULL;
}
+/* can be called without bucket lock. it will repeat the loop in
+ * the unlikely event when elements moved from one bucket into another
+ * while link list is being walked
+ */
+static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
+ u32 hash, void *key,
+ u32 key_size, u32 n_buckets)
+{
+ struct hlist_nulls_node *n;
+ struct htab_elem *l;
+
+again:
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+ if (l->hash == hash && !memcmp(&l->key, key, key_size))
+ return l;
+
+ if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
+ goto again;
+
+ return NULL;
+}
+
/* Called from syscall or from eBPF program */
static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct htab_elem *l;
u32 hash, key_size;
@@ -306,7 +343,7 @@
head = select_bucket(htab, hash);
- l = lookup_elem_raw(head, hash, key, key_size);
+ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
return l;
}
@@ -325,7 +362,7 @@
static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct htab_elem *l, *next_l;
u32 hash, key_size;
int i = 0;
@@ -342,13 +379,13 @@
head = select_bucket(htab, hash);
/* lookup the key */
- l = lookup_elem_raw(head, hash, key, key_size);
+ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
if (!l)
goto find_first_elem;
/* key was found, get next key in the same bucket */
- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
struct htab_elem, hash_node);
if (next_l) {
@@ -367,7 +404,7 @@
head = select_bucket(htab, i);
/* pick first element in the bucket */
- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
struct htab_elem, hash_node);
if (next_l) {
/* if it's not empty, just return it */
@@ -431,9 +468,13 @@
int err = 0;
if (prealloc) {
- l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
- if (!l_new)
+ struct pcpu_freelist_node *l;
+
+ l = pcpu_freelist_pop(&htab->freelist);
+ if (!l)
err = -E2BIG;
+ else
+ l_new = container_of(l, struct htab_elem, fnode);
} else {
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
atomic_dec(&htab->count);
@@ -520,7 +561,7 @@
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -559,9 +600,9 @@
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
- hlist_del_rcu(&l_old->hash_node);
+ hlist_nulls_del_rcu(&l_old->hash_node);
free_htab_elem(htab, l_old);
}
ret = 0;
@@ -576,7 +617,7 @@
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -628,7 +669,7 @@
ret = PTR_ERR(l_new);
goto err;
}
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
}
ret = 0;
err:
@@ -646,7 +687,7 @@
static int htab_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct bucket *b;
struct htab_elem *l;
unsigned long flags;
@@ -666,7 +707,7 @@
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
free_htab_elem(htab, l);
ret = 0;
}
@@ -680,12 +721,12 @@
int i;
for (i = 0; i < htab->n_buckets; i++) {
- struct hlist_head *head = select_bucket(htab, i);
- struct hlist_node *n;
+ struct hlist_nulls_head *head = select_bucket(htab, i);
+ struct hlist_nulls_node *n;
struct htab_elem *l;
- hlist_for_each_entry_safe(l, n, head, hash_node) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+ hlist_nulls_del_rcu(&l->hash_node);
if (l->state != HTAB_EXTRA_ELEM_USED)
htab_elem_free(htab, l);
}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index fb3e2a5..d06d15db 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -366,10 +366,12 @@
ifrr.ifr_ifru = ifr->ifr_ifru;
switch (cmd) {
+ case SIOCSHWTSTAMP:
+ if (!net_eq(dev_net(dev), &init_net))
+ break;
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- case SIOCSHWTSTAMP:
case SIOCGHWTSTAMP:
if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f57de0a..2e05a79 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -519,13 +519,15 @@
call_netdevice_notifiers(NETDEV_JOIN, dev);
err = dev_set_allmulti(dev, 1);
- if (err)
- goto put_back;
+ if (err) {
+ kfree(p); /* kobject not yet init'd, manually free */
+ goto err1;
+ }
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
SYSFS_BRIDGE_PORT_ATTR);
if (err)
- goto err1;
+ goto err2;
err = br_sysfs_addif(p);
if (err)
@@ -608,12 +610,9 @@
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
kobject_put(&p->kobj);
- p = NULL; /* kobject_put frees */
-err1:
dev_set_allmulti(dev, -1);
-put_back:
+err1:
dev_put(dev);
- kfree(p);
return err;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 31c4041..25c25e8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -486,9 +486,9 @@
rule->uid_range = fib_kuid_range_unset;
}
- if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
- rule_exists(ops, frh, tb, rule)) {
- err = -EEXIST;
+ if (rule_exists(ops, frh, tb, rule)) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ err = -EEXIST;
goto errout_free;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a860df2..482cf7c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -169,6 +169,7 @@
*/
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
{
+ int dif = inet_iif(skb);
struct sock *sk;
struct hlist_head *head;
int delivered = 0;
@@ -181,8 +182,7 @@
net = dev_net(skb->dev);
sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
- iph->saddr, iph->daddr,
- skb->dev->ifindex);
+ iph->saddr, iph->daddr, dif);
while (sk) {
delivered = 1;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index be74eee..47ca2a2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1069,7 +1069,7 @@
if (!tdev && tunnel->parms.link)
tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
- if (tdev) {
+ if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 197753a..8c17d49 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -23,7 +23,7 @@
static u32 mesh_table_hash(const void *addr, u32 len, u32 seed)
{
/* Use last four bytes of hw addr as hash index */
- return jhash_1word(*(u32 *)(addr+2), seed);
+ return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed);
}
static const struct rhashtable_params mesh_rht_params = {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index fd186b0..8475e86 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1643,7 +1643,7 @@
if (!cp) {
int v;
- if (!sysctl_schedule_icmp(ipvs))
+ if (ipip || !sysctl_schedule_icmp(ipvs))
return NF_ACCEPT;
if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 751fec7..e065140 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1728,7 +1728,7 @@
seqcount_init(&per_cpu(xt_recseq, i));
}
- xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
+ xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
if (!xt)
return -ENOMEM;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f269b31..7233ad8 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4624,14 +4624,29 @@
static int __init packet_init(void)
{
- int rc = proto_register(&packet_proto, 0);
+ int rc;
- if (rc != 0)
+ rc = proto_register(&packet_proto, 0);
+ if (rc)
goto out;
+ rc = sock_register(&packet_family_ops);
+ if (rc)
+ goto out_proto;
+ rc = register_pernet_subsys(&packet_net_ops);
+ if (rc)
+ goto out_sock;
+ rc = register_netdevice_notifier(&packet_netdev_notifier);
+ if (rc)
+ goto out_pernet;
- sock_register(&packet_family_ops);
- register_pernet_subsys(&packet_net_ops);
- register_netdevice_notifier(&packet_netdev_notifier);
+ return 0;
+
+out_pernet:
+ unregister_pernet_subsys(&packet_net_ops);
+out_sock:
+ sock_unregister(PF_PACKET);
+out_proto:
+ proto_unregister(&packet_proto);
out:
return rc;
}
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
old mode 100644
new mode 100755
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 700c74b..def6112 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -2204,7 +2204,7 @@
return val & 0xffffffff;
if (strcmp(type, "u64") == 0 ||
- strcmp(type, "s64"))
+ strcmp(type, "s64") == 0)
return val;
if (strcmp(type, "s8") == 0)
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
index 16058bb..c195b44 100755
--- a/tools/testing/selftests/net/run_netsocktests
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -6,7 +6,7 @@
./socket
if [ $? -ne 0 ]; then
echo "[FAIL]"
+ exit 1
else
echo "[PASS]"
fi
-
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index c9ff2b4..a37cb11 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
new file mode 100755
index 0000000..b48e183
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ for i in 1 2;do ip netns del nsclient$i;done
+ for i in 1 2;do ip netns del nsrouter$i;done
+}
+
+ipv4() {
+ echo -n 192.168.$1.2
+}
+
+ipv6 () {
+ echo -n dead:$1::2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+check_unknown()
+{
+ expect="packets 0 bytes 0"
+ for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ check_counter $n "unknown" "$expect"
+ if [ $? -ne 0 ] ;then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ ip netns add $n
+ ip -net $n link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
+DEV=veth0
+ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+
+DEV=veth0
+ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+
+DEV=veth0
+for i in 1 2; do
+ ip -net nsclient$i link set $DEV up
+ ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
+ ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
+done
+
+ip -net nsrouter1 link set eth1 up
+ip -net nsrouter1 link set veth0 up
+
+ip -net nsrouter2 link set eth1 up
+ip -net nsrouter2 link set eth2 up
+
+ip -net nsclient1 route add default via 192.168.1.1
+ip -net nsclient1 -6 route add default via dead:1::1
+
+ip -net nsclient2 route add default via 192.168.2.1
+ip -net nsclient2 route add default via dead:2::1
+
+i=3
+ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
+ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
+ip -net nsrouter1 addr add dead:1::1/64 dev eth1
+ip -net nsrouter1 addr add dead:3::1/64 dev veth0
+ip -net nsrouter1 route add default via 192.168.3.10
+ip -net nsrouter1 -6 route add default via dead:3::10
+
+ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
+ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
+ip -net nsrouter2 addr add dead:2::1/64 dev eth1
+ip -net nsrouter2 addr add dead:3::10/64 dev eth2
+ip -net nsrouter2 route add default via 192.168.3.1
+ip -net nsrouter2 route add default via dead:3::1
+
+sleep 2
+for i in 4 6; do
+ ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in nsrouter1 nsrouter2; do
+ip netns exec $netns nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+ meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+ meta l4proto { icmp, icmpv6 } ct state new,established accept
+ counter name "unknown" drop
+ }
+}
+EOF
+done
+
+ip netns exec nsclient1 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+ counter name "unknown" drop
+ }
+}
+EOF
+
+ip netns exec nsclient2 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter new { }
+ counter established { }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+ counter name "unknown" drop
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+ counter name "unknown" drop
+ }
+}
+EOF
+
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec nsrouter1 nft -f - <<EOF
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip protocol icmp oifname "veth0" counter masquerade
+ }
+}
+table ip6 nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+ }
+}
+EOF
+
+ip netns exec nsrouter2 ip link set eth1 mtu 1280
+ip netns exec nsclient2 ip link set veth0 mtu 1280
+sleep 1
+
+ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ip routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in nsrouter1 nsrouter2 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+expect="packets 2 bytes 2076"
+check_counter nsclient2 "new" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+check_counter "nsrouter2" "related" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+if [ $ret -eq 0 ];then
+ echo "PASS: icmp mtu error had RELATED state"
+else
+ echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+cleanup
+exit $ret