Merge tag 'pinctrl-v3.15-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl
Pull pincontrol fixes from Linus Walleij:
"A first set of pin control fixes for the v3.15 series:
- Fix a couple of barnsjukdomar on the Rockchip driver.
- Remove an idiotic debug print I happened to leave behind in the
Nomadik driver.
- Fixup the Qualcomm MSM interrupt handling code for the TLMM v2.
- Three patches renaming the Broadcom Capri driver to BCM28155. This
has been falling between the chairs for some time due to some
cross-tree synchronization misunderstandings, now I'm fed up with
this and just rename it in this -rc1 phase"
* tag 'pinctrl-v3.15-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl:
pinctrl: fix typo in bindings documentation
Update bcm_defconfig with new pinctrl CONFIG
pinctrl: Rename Broadcom Capri pinctrl driver
pinctrl: msm: Correct interrupt code for TLMM v2
pinctrl: nomadik: delete stray debug print
pinctrl: rockchip: handle first half of rk3188-bank0 correctly
pinctrl: rockchip: add return value to rockchip_set_mux
pinctrl: rockchip: fix offset of mux registers for rk3188
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index e6f80fc..a4acdda 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -259,7 +259,7 @@
* Switch into virtual mode:
*/
movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
- |IA64_PSR_DI|IA64_PSR_AC)
+ |IA64_PSR_DI)
;;
mov cr.ipsr=r16
movl r17=1f
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 689ffca..18e794a 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -58,7 +58,7 @@
#include <asm/unistd.h>
#include <asm/errno.h>
-#if 1
+#if 0
# define PSR_DEFAULT_BITS psr.ac
#else
# define PSR_DEFAULT_BITS 0
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index 2401848..397e34a 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -64,7 +64,7 @@
#include "kvm_minstate.h"
#include "vti.h"
-#if 1
+#if 0
# define PSR_DEFAULT_BITS psr.ac
#else
# define PSR_DEFAULT_BITS 0
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index d091aa1..bf9c823 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -31,4 +31,23 @@
#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL
#define SIGP_STATUS_NOT_RUNNING 0x00000400UL
+#ifndef __ASSEMBLY__
+
+static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
+{
+ register unsigned int reg1 asm ("1") = parm;
+ int cc;
+
+ asm volatile(
+ " sigp %1,%2,0(%3)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+ if (status && cc == 1)
+ *status = reg1;
+ return cc;
+}
+
+#endif /* __ASSEMBLY__ */
+
#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 1607793..21703f8 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -7,6 +7,8 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+#include <asm/sigp.h>
+
#ifdef CONFIG_SMP
#include <asm/lowcore.h>
@@ -50,9 +52,18 @@
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_yield(void) { }
-static inline void smp_stop_cpu(void) { }
static inline void smp_fill_possible_mask(void) { }
+static inline void smp_stop_cpu(void)
+{
+ u16 pcpu = stap();
+
+ for (;;) {
+ __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL);
+ cpu_relax();
+ }
+}
+
#endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 5eb5c9d..3802d2d 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -282,7 +282,8 @@
#define __NR_finit_module 344
#define __NR_sched_setattr 345
#define __NR_sched_getattr 346
-#define NR_syscalls 345
+#define __NR_renameat2 347
+#define NR_syscalls 348
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index 824c39d..45cdb37 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -1,5 +1,5 @@
/*
- * Compat sytem call wrappers.
+ * Compat system call wrappers.
*
* Copyright IBM Corp. 2014
*/
@@ -213,3 +213,4 @@
COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags);
COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index e6af940..acb4124 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -144,10 +144,10 @@
char *mode;
mode = user_mode(regs) ? "User" : "Krnl";
- printk("%s PSW : %p %p (%pSR)\n",
- mode, (void *) regs->psw.mask,
- (void *) regs->psw.addr,
- (void *) regs->psw.addr);
+ printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+ if (!user_mode(regs))
+ printk(" (%pSR)", (void *)regs->psw.addr);
+ printk("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
"P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 4ac8faf..1c82619 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -64,7 +64,7 @@
if (task->thread.per_flags & PER_FLAG_NO_TE)
cr_new &= ~(1UL << 55);
if (cr_new != cr)
- __ctl_load(cr, 0, 0);
+ __ctl_load(cr_new, 0, 0);
/* Set or clear transaction execution TDC bits 62 and 63. */
__ctl_store(cr, 2, 2);
cr_new = cr & ~3UL;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f70f248..88d1ca8 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1027,3 +1027,35 @@
/* Setup zfcpdump support */
setup_zfcpdump();
}
+
+#ifdef CONFIG_32BIT
+static int no_removal_warning __initdata;
+
+static int __init parse_no_removal_warning(char *str)
+{
+ no_removal_warning = 1;
+ return 0;
+}
+__setup("no_removal_warning", parse_no_removal_warning);
+
+static int __init removal_warning(void)
+{
+ if (no_removal_warning)
+ return 0;
+ printk(KERN_ALERT "\n\n");
+ printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n");
+ printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n");
+ printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n");
+ printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n");
+ printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n");
+ printk(KERN_CONT "please let us know. Please write to:\n");
+ printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n");
+ printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n");
+ printk(KERN_CONT "Thank you!\n\n");
+ printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n");
+ printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n");
+ schedule_timeout_uninterruptible(300 * HZ);
+ return 0;
+}
+early_initcall(removal_warning);
+#endif
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 512ce1c..86e65ec 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -82,21 +82,6 @@
/*
* Signal processor helper functions.
*/
-static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
-{
- register unsigned int reg1 asm ("1") = parm;
- int cc;
-
- asm volatile(
- " sigp %1,%2,0(%3)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
- if (status && cc == 1)
- *status = reg1;
- return cc;
-}
-
static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
{
int cc;
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 542ef48..fe5cdf2 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -355,3 +355,4 @@
SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module)
SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */
SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr)
+SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2)
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 23f866b..7416efe 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -338,9 +338,6 @@
register unsigned long reg0 asm("0") = 0;
unsigned long tmp1, tmp2;
- if (unlikely(!size))
- return 0;
- update_primary_asce(current);
asm volatile(
" la %2,0(%1)\n"
" la %3,0(%0,%1)\n"
@@ -359,6 +356,8 @@
unsigned long __strnlen_user(const char __user *src, unsigned long size)
{
+ if (unlikely(!size))
+ return 0;
update_primary_asce(current);
return strnlen_user_srst(src, size);
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 19f623f..2f51a99 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -126,6 +126,133 @@
return 0;
}
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+#ifdef CONFIG_64BIT
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%016lx ", asce);
+ switch (asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_REGION1:
+ table = table + ((address >> 53) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R1:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION2:
+ table = table + ((address >> 42) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R2:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION3:
+ table = table + ((address >> 31) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R3:%016lx ", *table);
+ if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_SEGMENT:
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont(KERN_CONT "S:%016lx ", *table);
+ if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ }
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%016lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#else /* CONFIG_64BIT */
+
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%08lx ", asce);
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("S:%08lx ", *table);
+ if (*table & _SEGMENT_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%08lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#endif /* CONFIG_64BIT */
+
+static void dump_fault_info(struct pt_regs *regs)
+{
+ unsigned long asce;
+
+ pr_alert("Fault in ");
+ switch (regs->int_parm_long & 3) {
+ case 3:
+ pr_cont("home space ");
+ break;
+ case 2:
+ pr_cont("secondary space ");
+ break;
+ case 1:
+ pr_cont("access register ");
+ break;
+ case 0:
+ pr_cont("primary space ");
+ break;
+ }
+ pr_cont("mode while using ");
+ if (!user_space_fault(regs)) {
+ asce = S390_lowcore.kernel_asce;
+ pr_cont("kernel ");
+ }
+#ifdef CONFIG_PGSTE
+ else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+ asce = gmap->asce;
+ pr_cont("gmap ");
+ }
+#endif
+ else {
+ asce = S390_lowcore.user_asce;
+ pr_cont("user ");
+ }
+ pr_cont("ASCE.\n");
+ dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
+}
+
static inline void report_user_fault(struct pt_regs *regs, long signr)
{
if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
@@ -138,8 +265,9 @@
regs->int_code);
print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
printk(KERN_CONT "\n");
- printk(KERN_ALERT "failing address: %lX\n",
- regs->int_parm_long & __FAIL_ADDR_MASK);
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
show_regs(regs);
}
@@ -177,11 +305,13 @@
address = regs->int_parm_long & __FAIL_ADDR_MASK;
if (!user_space_fault(regs))
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
- " at virtual kernel address %p\n", (void *)address);
+ " in virtual kernel address space\n");
else
printk(KERN_ALERT "Unable to handle kernel paging request"
- " at virtual user address %p\n", (void *)address);
-
+ " in virtual user address space\n");
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
die(regs, "Oops");
do_exit(SIGKILL);
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fcaf9c9..7de069af 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -60,7 +60,7 @@
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bea6067..f47a104 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -308,7 +308,7 @@
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
- F(ADX);
+ F(ADX) | F(SMAP);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a2a1bb7..eeecbed 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -48,6 +48,14 @@
return best && (best->ebx & bit(X86_FEATURE_SMEP));
}
+static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_SMAP));
+}
+
static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f5704d9..813d310 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3601,20 +3601,27 @@
}
}
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
{
unsigned bit, byte, pfec;
u8 map;
- bool fault, x, w, u, wf, uf, ff, smep;
+ bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
- smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
+ /*
+ * PFERR_RSVD_MASK bit is set in PFEC if the access is not
+ * subject to SMAP restrictions, and cleared otherwise. The
+ * bit is only meaningful if the SMAP bit is set in CR4.
+ */
+ smapf = !(pfec & PFERR_RSVD_MASK);
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
@@ -3626,12 +3633,33 @@
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if cr4.smep */
- x &= !(smep && u && !uf);
+ x &= !(cr4_smep && u && !uf);
+
+ /*
+ * SMAP:kernel-mode data accesses from user-mode
+ * mappings should fault. A fault is considered
+ * as a SMAP violation if all of the following
+ * conditions are ture:
+ * - X86_CR4_SMAP is set in CR4
+ * - An user page is accessed
+ * - Page fault in kernel mode
+ * - if CPL = 3 or X86_EFLAGS_AC is clear
+ *
+ * Here, we cover the first three conditions.
+ * The fourth is computed dynamically in
+ * permission_fault() and is in smapf.
+ *
+ * Also, SMAP does not affect instruction
+ * fetches, add the !ff check here to make it
+ * clearer.
+ */
+ smap = cr4_smap && u && !uf && !ff;
} else
/* Not really needed: no U/S accesses on ept */
u = 1;
- fault = (ff && !x) || (uf && !u) || (wf && !w);
+ fault = (ff && !x) || (uf && !u) || (wf && !w) ||
+ (smapf && smap);
map |= fault << bit;
}
mmu->permissions[byte] = map;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2926152..3842e70 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,11 +44,17 @@
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U << 1)
-#define PFERR_USER_MASK (1U << 2)
-#define PFERR_RSVD_MASK (1U << 3)
-#define PFERR_FETCH_MASK (1U << 4)
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+
+#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
+#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
+#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
@@ -73,6 +79,8 @@
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
+void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ bool ept);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
@@ -110,10 +118,30 @@
* Will a fault with a given page-fault error code (pfec) cause a permission
* fault with the given access (in ACC_* format)?
*/
-static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
- unsigned pfec)
+static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ unsigned pte_access, unsigned pfec)
{
- return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
+ int cpl = kvm_x86_ops->get_cpl(vcpu);
+ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+
+ /*
+ * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
+ *
+ * If CPL = 3, SMAP applies to all supervisor-mode data accesses
+ * (these are implicit supervisor accesses) regardless of the value
+ * of EFLAGS.AC.
+ *
+ * This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving
+ * the result in X86_EFLAGS_AC. We then insert it in place of
+ * the PFERR_RSVD_MASK bit; this bit will always be zero in pfec,
+ * but it will be one in index if SMAP checks are being overridden.
+ * It is important to keep this branchless.
+ */
+ unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
+ int index = (pfec >> 1) +
+ (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+
+ return (mmu->permissions[index] >> pte_access) & 1;
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b1e6c1b..123efd3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -353,7 +353,7 @@
walker->ptes[walker->level - 1] = pte;
} while (!is_last_gpte(mmu, walker->level, pte));
- if (unlikely(permission_fault(mmu, pte_access, access))) {
+ if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) {
errcode |= PFERR_PRESENT_MASK;
goto error;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1320e0f..1f68c58 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3484,13 +3484,14 @@
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
/*
- * SMEP is disabled if CPU is in non-paging mode in
- * hardware. However KVM always uses paging mode to
+ * SMEP/SMAP is disabled if CPU is in non-paging mode
+ * in hardware. However KVM always uses paging mode to
* emulate guest non-paging mode with TDP.
- * To emulate this behavior, SMEP needs to be manually
- * disabled when guest switches to non-paging mode.
+ * To emulate this behavior, SMEP/SMAP needs to be
+ * manually disabled when guest switches to non-paging
+ * mode.
*/
- hw_cr4 &= ~X86_CR4_SMEP;
+ hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d1b5cd..8b8fc0b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -652,6 +652,9 @@
if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
return 1;
+ if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
+ return 1;
+
if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
return 1;
@@ -680,6 +683,9 @@
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
+ if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
+ update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
+
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
kvm_update_cpuid(vcpu);
@@ -1117,7 +1123,6 @@
{
struct timespec ts;
- WARN_ON(preemptible());
ktime_get_ts(&ts);
monotonic_to_bootbased(&ts);
return timespec_to_ns(&ts);
@@ -4164,7 +4169,8 @@
| (write ? PFERR_WRITE_MASK : 0);
if (vcpu_match_mmio_gva(vcpu, gva)
- && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
+ && !permission_fault(vcpu, vcpu->arch.walk_mmu,
+ vcpu->arch.access, access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false);
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
index 8c3b255..e900961 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -61,18 +61,18 @@
}
bcm2835_rng_ops.priv = (unsigned long)rng_base;
+ /* set warm-up count & enable */
+ __raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS);
+ __raw_writel(RNG_RBGEN, rng_base + RNG_CTRL);
+
/* register driver */
err = hwrng_register(&bcm2835_rng_ops);
if (err) {
dev_err(dev, "hwrng registration failed\n");
iounmap(rng_base);
- } else {
+ } else
dev_info(dev, "hwrng registered\n");
- /* set warm-up count & enable */
- __raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS);
- __raw_writel(RNG_RBGEN, rng_base + RNG_CTRL);
- }
return err;
}
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index a8efb18..0ab8370 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8627,6 +8627,7 @@
pci_disable_device(pdev);
}
+#ifdef CONFIG_PM_SLEEP
static int
bnx2_suspend(struct device *device)
{
@@ -8665,7 +8666,6 @@
return 0;
}
-#ifdef CONFIG_PM_SLEEP
static SIMPLE_DEV_PM_OPS(bnx2_pm_ops, bnx2_suspend, bnx2_resume);
#define BNX2_PM_OPS (&bnx2_pm_ops)
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig
index 751d5c7..7e49c43 100644
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -4,7 +4,7 @@
config NET_CADENCE
bool "Cadence devices"
- depends on HAS_IOMEM
+ depends on HAS_IOMEM && (ARM || AVR32 || COMPILE_TEST)
default y
---help---
If you have a network (Ethernet) card belonging to this class, say Y.
@@ -22,7 +22,7 @@
config ARM_AT91_ETHER
tristate "AT91RM9200 Ethernet support"
- depends on HAS_DMA
+ depends on HAS_DMA && (ARCH_AT91RM9200 || COMPILE_TEST)
select MACB
---help---
If you wish to compile a kernel for the AT91RM9200 and enable
@@ -30,7 +30,7 @@
config MACB
tristate "Cadence MACB/GEM support"
- depends on HAS_DMA
+ depends on HAS_DMA && (PLATFORM_AT32AP || ARCH_AT91 || ARCH_PICOXCELL || ARCH_ZYNQ || COMPILE_TEST)
select PHYLIB
---help---
The Cadence MACB ethernet interface is found on many Atmel AT32 and
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 81e8402..8a96572 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -154,7 +154,7 @@
req->params = htons(L2T_W_PORT(e->lport) | L2T_W_NOREPLY(!sync));
req->l2t_idx = htons(e->idx);
req->vlan = htons(e->vlan);
- if (e->neigh)
+ if (e->neigh && !(e->neigh->dev->flags & IFF_LOOPBACK))
memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac));
memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
@@ -394,6 +394,8 @@
if (e) {
spin_lock(&e->lock); /* avoid race with t4_l2t_free */
e->state = L2T_STATE_RESOLVING;
+ if (neigh->dev->flags & IFF_LOOPBACK)
+ memcpy(e->dmac, physdev->dev_addr, sizeof(e->dmac));
memcpy(e->addr, addr, addr_len);
e->ifindex = ifidx;
e->hash = hash;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index fb2fe65..bba6768 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -682,7 +682,7 @@
SF_RD_ID = 0x9f, /* read ID */
SF_ERASE_SECTOR = 0xd8, /* erase sector */
- FW_MAX_SIZE = 512 * 1024,
+ FW_MAX_SIZE = 16 * SF_SEC_SIZE,
};
/**
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 8ccaa25..97db5a7 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -374,6 +374,7 @@
#define BE_FLAGS_NAPI_ENABLED (1 << 9)
#define BE_FLAGS_QNQ_ASYNC_EVT_RCVD (1 << 11)
#define BE_FLAGS_VXLAN_OFFLOADS (1 << 12)
+#define BE_FLAGS_SETUP_DONE (1 << 13)
#define BE_UC_PMAC_COUNT 30
#define BE_VF_UC_PMAC_COUNT 2
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3e6df47..a186454 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2033,11 +2033,13 @@
bool dummy_wrb;
int i, pending_txqs;
- /* Wait for a max of 200ms for all the tx-completions to arrive. */
+ /* Stop polling for compls when HW has been silent for 10ms */
do {
pending_txqs = adapter->num_tx_qs;
for_all_tx_queues(adapter, txo, i) {
+ cmpl = 0;
+ num_wrbs = 0;
txq = &txo->q;
while ((txcp = be_tx_compl_get(&txo->cq))) {
end_idx =
@@ -2050,14 +2052,13 @@
if (cmpl) {
be_cq_notify(adapter, txo->cq.id, false, cmpl);
atomic_sub(num_wrbs, &txq->used);
- cmpl = 0;
- num_wrbs = 0;
+ timeo = 0;
}
if (atomic_read(&txq->used) == 0)
pending_txqs--;
}
- if (pending_txqs == 0 || ++timeo > 200)
+ if (pending_txqs == 0 || ++timeo > 10 || be_hw_error(adapter))
break;
mdelay(1);
@@ -2725,6 +2726,12 @@
struct be_eq_obj *eqo;
int i;
+ /* This protection is needed as be_close() may be called even when the
+ * adapter is in cleared state (after eeh perm failure)
+ */
+ if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
+ return 0;
+
be_roce_dev_close(adapter);
if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
@@ -3055,6 +3062,7 @@
be_clear_queues(adapter);
be_msix_disable(adapter);
+ adapter->flags &= ~BE_FLAGS_SETUP_DONE;
return 0;
}
@@ -3559,6 +3567,7 @@
adapter->phy.fc_autoneg = 1;
be_schedule_worker(adapter);
+ adapter->flags |= BE_FLAGS_SETUP_DONE;
return 0;
err:
be_clear(adapter);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d04b1c3..b248bcb 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -89,9 +89,8 @@
#define MVNETA_TX_IN_PRGRS BIT(1)
#define MVNETA_TX_FIFO_EMPTY BIT(8)
#define MVNETA_RX_MIN_FRAME_SIZE 0x247c
-#define MVNETA_SERDES_CFG 0x24A0
+#define MVNETA_SGMII_SERDES_CFG 0x24A0
#define MVNETA_SGMII_SERDES_PROTO 0x0cc7
-#define MVNETA_RGMII_SERDES_PROTO 0x0667
#define MVNETA_TYPE_PRIO 0x24bc
#define MVNETA_FORCE_UNI BIT(21)
#define MVNETA_TXQ_CMD_1 0x24e4
@@ -712,6 +711,35 @@
mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
}
+
+
+/* Sets the RGMII Enable bit (RGMIIEn) in port MAC control register */
+static void mvneta_gmac_rgmii_set(struct mvneta_port *pp, int enable)
+{
+ u32 val;
+
+ val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+
+ if (enable)
+ val |= MVNETA_GMAC2_PORT_RGMII;
+ else
+ val &= ~MVNETA_GMAC2_PORT_RGMII;
+
+ mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+}
+
+/* Config SGMII port */
+static void mvneta_port_sgmii_config(struct mvneta_port *pp)
+{
+ u32 val;
+
+ val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+ val |= MVNETA_GMAC2_PCS_ENABLE;
+ mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+
+ mvreg_write(pp, MVNETA_SGMII_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO);
+}
+
/* Start the Ethernet port RX and TX activity */
static void mvneta_port_up(struct mvneta_port *pp)
{
@@ -2729,15 +2757,12 @@
mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0);
if (phy_mode == PHY_INTERFACE_MODE_SGMII)
- mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO);
- else
- mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_RGMII_SERDES_PROTO);
+ mvneta_port_sgmii_config(pp);
- val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
-
- val |= MVNETA_GMAC2_PCS_ENABLE | MVNETA_GMAC2_PORT_RGMII;
+ mvneta_gmac_rgmii_set(pp, 1);
/* Cancel Port Reset */
+ val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
val &= ~MVNETA_GMAC2_PORT_RESET;
mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index f0ae95f..cef267e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2301,13 +2301,8 @@
/* Allow large DMA segments, up to the firmware limit of 1 GB */
dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv) {
- err = -ENOMEM;
- goto err_release_regions;
- }
-
- dev = &priv->dev;
+ dev = pci_get_drvdata(pdev);
+ priv = mlx4_priv(dev);
dev->pdev = pdev;
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
@@ -2374,10 +2369,10 @@
} else {
atomic_inc(&pf_loading);
err = pci_enable_sriov(pdev, total_vfs);
- atomic_dec(&pf_loading);
if (err) {
mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
err);
+ atomic_dec(&pf_loading);
err = 0;
} else {
mlx4_warn(dev, "Running in master mode\n");
@@ -2535,8 +2530,10 @@
mlx4_sense_init(dev);
mlx4_start_sense(dev);
- priv->pci_dev_data = pci_dev_data;
- pci_set_drvdata(pdev, dev);
+ priv->removed = 0;
+
+ if (mlx4_is_master(dev) && dev->num_vfs)
+ atomic_dec(&pf_loading);
return 0;
@@ -2588,6 +2585,9 @@
if (!mlx4_is_slave(dev))
mlx4_free_ownership(dev);
+ if (mlx4_is_master(dev) && dev->num_vfs)
+ atomic_dec(&pf_loading);
+
kfree(priv->dev.dev_vfs);
err_free_dev:
@@ -2604,85 +2604,110 @@
static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct mlx4_priv *priv;
+ struct mlx4_dev *dev;
+
printk_once(KERN_INFO "%s", mlx4_version);
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ dev = &priv->dev;
+ pci_set_drvdata(pdev, dev);
+ priv->pci_dev_data = id->driver_data;
+
return __mlx4_init_one(pdev, id->driver_data);
}
+static void __mlx4_remove_one(struct pci_dev *pdev)
+{
+ struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int pci_dev_data;
+ int p;
+
+ if (priv->removed)
+ return;
+
+ pci_dev_data = priv->pci_dev_data;
+
+ /* in SRIOV it is not allowed to unload the pf's
+ * driver while there are alive vf's */
+ if (mlx4_is_master(dev) && mlx4_how_many_lives_vf(dev))
+ printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
+ mlx4_stop_sense(dev);
+ mlx4_unregister_device(dev);
+
+ for (p = 1; p <= dev->caps.num_ports; p++) {
+ mlx4_cleanup_port_info(&priv->port[p]);
+ mlx4_CLOSE_PORT(dev, p);
+ }
+
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_SLAVES_ONLY);
+
+ mlx4_cleanup_counters_table(dev);
+ mlx4_cleanup_qp_table(dev);
+ mlx4_cleanup_srq_table(dev);
+ mlx4_cleanup_cq_table(dev);
+ mlx4_cmd_use_polling(dev);
+ mlx4_cleanup_eq_table(dev);
+ mlx4_cleanup_mcg_table(dev);
+ mlx4_cleanup_mr_table(dev);
+ mlx4_cleanup_xrcd_table(dev);
+ mlx4_cleanup_pd_table(dev);
+
+ if (mlx4_is_master(dev))
+ mlx4_free_resource_tracker(dev,
+ RES_TR_FREE_STRUCTS_ONLY);
+
+ iounmap(priv->kar);
+ mlx4_uar_free(dev, &priv->driver_uar);
+ mlx4_cleanup_uar_table(dev);
+ if (!mlx4_is_slave(dev))
+ mlx4_clear_steering(dev);
+ mlx4_free_eq_table(dev);
+ if (mlx4_is_master(dev))
+ mlx4_multi_func_cleanup(dev);
+ mlx4_close_hca(dev);
+ if (mlx4_is_slave(dev))
+ mlx4_multi_func_cleanup(dev);
+ mlx4_cmd_cleanup(dev);
+
+ if (dev->flags & MLX4_FLAG_MSI_X)
+ pci_disable_msix(pdev);
+ if (dev->flags & MLX4_FLAG_SRIOV) {
+ mlx4_warn(dev, "Disabling SR-IOV\n");
+ pci_disable_sriov(pdev);
+ dev->num_vfs = 0;
+ }
+
+ if (!mlx4_is_slave(dev))
+ mlx4_free_ownership(dev);
+
+ kfree(dev->caps.qp0_tunnel);
+ kfree(dev->caps.qp0_proxy);
+ kfree(dev->caps.qp1_tunnel);
+ kfree(dev->caps.qp1_proxy);
+ kfree(dev->dev_vfs);
+
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ memset(priv, 0, sizeof(*priv));
+ priv->pci_dev_data = pci_dev_data;
+ priv->removed = 1;
+}
+
static void mlx4_remove_one(struct pci_dev *pdev)
{
struct mlx4_dev *dev = pci_get_drvdata(pdev);
struct mlx4_priv *priv = mlx4_priv(dev);
- int p;
- if (dev) {
- /* in SRIOV it is not allowed to unload the pf's
- * driver while there are alive vf's */
- if (mlx4_is_master(dev)) {
- if (mlx4_how_many_lives_vf(dev))
- printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
- }
- mlx4_stop_sense(dev);
- mlx4_unregister_device(dev);
-
- for (p = 1; p <= dev->caps.num_ports; p++) {
- mlx4_cleanup_port_info(&priv->port[p]);
- mlx4_CLOSE_PORT(dev, p);
- }
-
- if (mlx4_is_master(dev))
- mlx4_free_resource_tracker(dev,
- RES_TR_FREE_SLAVES_ONLY);
-
- mlx4_cleanup_counters_table(dev);
- mlx4_cleanup_qp_table(dev);
- mlx4_cleanup_srq_table(dev);
- mlx4_cleanup_cq_table(dev);
- mlx4_cmd_use_polling(dev);
- mlx4_cleanup_eq_table(dev);
- mlx4_cleanup_mcg_table(dev);
- mlx4_cleanup_mr_table(dev);
- mlx4_cleanup_xrcd_table(dev);
- mlx4_cleanup_pd_table(dev);
-
- if (mlx4_is_master(dev))
- mlx4_free_resource_tracker(dev,
- RES_TR_FREE_STRUCTS_ONLY);
-
- iounmap(priv->kar);
- mlx4_uar_free(dev, &priv->driver_uar);
- mlx4_cleanup_uar_table(dev);
- if (!mlx4_is_slave(dev))
- mlx4_clear_steering(dev);
- mlx4_free_eq_table(dev);
- if (mlx4_is_master(dev))
- mlx4_multi_func_cleanup(dev);
- mlx4_close_hca(dev);
- if (mlx4_is_slave(dev))
- mlx4_multi_func_cleanup(dev);
- mlx4_cmd_cleanup(dev);
-
- if (dev->flags & MLX4_FLAG_MSI_X)
- pci_disable_msix(pdev);
- if (dev->flags & MLX4_FLAG_SRIOV) {
- mlx4_warn(dev, "Disabling SR-IOV\n");
- pci_disable_sriov(pdev);
- }
-
- if (!mlx4_is_slave(dev))
- mlx4_free_ownership(dev);
-
- kfree(dev->caps.qp0_tunnel);
- kfree(dev->caps.qp0_proxy);
- kfree(dev->caps.qp1_tunnel);
- kfree(dev->caps.qp1_proxy);
- kfree(dev->dev_vfs);
-
- kfree(priv);
- pci_release_regions(pdev);
- pci_disable_device(pdev);
- pci_set_drvdata(pdev, NULL);
- }
+ __mlx4_remove_one(pdev);
+ kfree(priv);
+ pci_set_drvdata(pdev, NULL);
}
int mlx4_restart_one(struct pci_dev *pdev)
@@ -2692,7 +2717,7 @@
int pci_dev_data;
pci_dev_data = priv->pci_dev_data;
- mlx4_remove_one(pdev);
+ __mlx4_remove_one(pdev);
return __mlx4_init_one(pdev, pci_dev_data);
}
@@ -2747,7 +2772,7 @@
static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
- mlx4_remove_one(pdev);
+ __mlx4_remove_one(pdev);
return state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
@@ -2755,11 +2780,11 @@
static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
{
- const struct pci_device_id *id;
- int ret;
+ struct mlx4_dev *dev = pci_get_drvdata(pdev);
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ int ret;
- id = pci_match_id(mlx4_pci_table, pdev);
- ret = __mlx4_init_one(pdev, id->driver_data);
+ ret = __mlx4_init_one(pdev, priv->pci_dev_data);
return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index cf8be41..f9c4651 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -800,6 +800,7 @@
spinlock_t ctx_lock;
int pci_dev_data;
+ int removed;
struct list_head pgdir_list;
struct mutex pgdir_mutex;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index b48737d..ba20c72 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -2139,8 +2139,6 @@
ahw->max_mac_filters = nic_info.max_mac_filters;
ahw->max_mtu = nic_info.max_mtu;
- adapter->max_tx_rings = ahw->max_tx_ques;
- adapter->max_sds_rings = ahw->max_rx_ques;
/* eSwitch capability indicates vNIC mode.
* vNIC and SRIOV are mutually exclusive operational modes.
* If SR-IOV capability is detected, SR-IOV physical function
@@ -2161,6 +2159,7 @@
int qlcnic_83xx_configure_opmode(struct qlcnic_adapter *adapter)
{
struct qlcnic_hardware_context *ahw = adapter->ahw;
+ u16 max_sds_rings, max_tx_rings;
int ret;
ret = qlcnic_83xx_get_nic_configuration(adapter);
@@ -2173,18 +2172,21 @@
if (qlcnic_83xx_config_vnic_opmode(adapter))
return -EIO;
- adapter->max_sds_rings = QLCNIC_MAX_VNIC_SDS_RINGS;
- adapter->max_tx_rings = QLCNIC_MAX_VNIC_TX_RINGS;
+ max_sds_rings = QLCNIC_MAX_VNIC_SDS_RINGS;
+ max_tx_rings = QLCNIC_MAX_VNIC_TX_RINGS;
} else if (ret == QLC_83XX_DEFAULT_OPMODE) {
ahw->nic_mode = QLCNIC_DEFAULT_MODE;
adapter->nic_ops->init_driver = qlcnic_83xx_init_default_driver;
ahw->idc.state_entry = qlcnic_83xx_idc_ready_state_entry;
- adapter->max_sds_rings = QLCNIC_MAX_SDS_RINGS;
- adapter->max_tx_rings = QLCNIC_MAX_TX_RINGS;
+ max_sds_rings = QLCNIC_MAX_SDS_RINGS;
+ max_tx_rings = QLCNIC_MAX_TX_RINGS;
} else {
return -EIO;
}
+ adapter->max_sds_rings = min(ahw->max_rx_ques, max_sds_rings);
+ adapter->max_tx_rings = min(ahw->max_tx_ques, max_tx_rings);
+
return 0;
}
@@ -2348,15 +2350,16 @@
goto disable_intr;
}
+ INIT_DELAYED_WORK(&adapter->idc_aen_work, qlcnic_83xx_idc_aen_work);
+
err = qlcnic_83xx_setup_mbx_intr(adapter);
if (err)
goto disable_mbx_intr;
qlcnic_83xx_clear_function_resources(adapter);
-
- INIT_DELAYED_WORK(&adapter->idc_aen_work, qlcnic_83xx_idc_aen_work);
-
+ qlcnic_dcb_enable(adapter->dcb);
qlcnic_83xx_initialize_nic(adapter, 1);
+ qlcnic_dcb_get_info(adapter->dcb);
/* Configure default, SR-IOV or Virtual NIC mode of operation */
err = qlcnic_83xx_configure_opmode(adapter);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index 64dcbf3..c1e11f5 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -883,8 +883,6 @@
npar_info->max_rx_ques = le16_to_cpu(nic_info->max_rx_ques);
npar_info->capabilities = le32_to_cpu(nic_info->capabilities);
npar_info->max_mtu = le16_to_cpu(nic_info->max_mtu);
- adapter->max_tx_rings = npar_info->max_tx_ques;
- adapter->max_sds_rings = npar_info->max_rx_ques;
}
qlcnic_free_mbx_args(&cmd);
@@ -1356,6 +1354,7 @@
arg2 &= ~BIT_3;
break;
case QLCNIC_ADD_VLAN:
+ arg1 &= ~(0x0ffff << 16);
arg1 |= (BIT_2 | BIT_5);
arg1 |= (esw_cfg->vlan_id << 16);
break;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
index 7d4f549..a51fe18 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c
@@ -330,8 +330,6 @@
goto out_free_cfg;
}
- qlcnic_dcb_get_info(dcb);
-
return 0;
out_free_cfg:
kfree(dcb->cfg);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 309d056..dbf7539 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -670,7 +670,7 @@
else
num_msix += adapter->drv_tx_rings;
- if (adapter->drv_rss_rings > 0)
+ if (adapter->drv_rss_rings > 0)
num_msix += adapter->drv_rss_rings;
else
num_msix += adapter->drv_sds_rings;
@@ -686,19 +686,15 @@
return -ENOMEM;
}
-restore:
for (vector = 0; vector < num_msix; vector++)
adapter->msix_entries[vector].entry = vector;
+restore:
err = pci_enable_msix(pdev, adapter->msix_entries, num_msix);
- if (err == 0) {
- adapter->ahw->num_msix = num_msix;
- if (adapter->drv_tss_rings > 0)
- adapter->drv_tx_rings = adapter->drv_tss_rings;
+ if (err > 0) {
+ if (!adapter->drv_tss_rings && !adapter->drv_rss_rings)
+ return -ENOSPC;
- if (adapter->drv_rss_rings > 0)
- adapter->drv_sds_rings = adapter->drv_rss_rings;
- } else {
netdev_info(adapter->netdev,
"Unable to allocate %d MSI-X vectors, Available vectors %d\n",
num_msix, err);
@@ -716,12 +712,20 @@
"Restoring %d Tx, %d SDS rings for total %d vectors.\n",
adapter->drv_tx_rings, adapter->drv_sds_rings,
num_msix);
- goto restore;
- err = -EIO;
+ goto restore;
+ } else if (err < 0) {
+ return err;
}
- return err;
+ adapter->ahw->num_msix = num_msix;
+ if (adapter->drv_tss_rings > 0)
+ adapter->drv_tx_rings = adapter->drv_tss_rings;
+
+ if (adapter->drv_rss_rings > 0)
+ adapter->drv_sds_rings = adapter->drv_rss_rings;
+
+ return 0;
}
int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix)
@@ -2528,8 +2532,6 @@
goto err_out_free_hw;
}
- qlcnic_dcb_enable(adapter->dcb);
-
if (qlcnic_read_mac_addr(adapter))
dev_warn(&pdev->dev, "failed to read mac addr\n");
@@ -2549,7 +2551,10 @@
"Device does not support MSI interrupts\n");
if (qlcnic_82xx_check(adapter)) {
+ qlcnic_dcb_enable(adapter->dcb);
+ qlcnic_dcb_get_info(adapter->dcb);
err = qlcnic_setup_intr(adapter);
+
if (err) {
dev_err(&pdev->dev, "Failed to setup interrupt\n");
goto err_out_disable_msi;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index 14f748c..2801379 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -461,6 +461,16 @@
{
struct net_device *netdev = adapter->netdev;
+ if (pci_vfs_assigned(adapter->pdev)) {
+ netdev_err(adapter->netdev,
+ "SR-IOV VFs belonging to port %d are assigned to VMs. SR-IOV can not be disabled on this port\n",
+ adapter->portnum);
+ netdev_info(adapter->netdev,
+ "Please detach SR-IOV VFs belonging to port %d from VMs, and then try to disable SR-IOV on this port\n",
+ adapter->portnum);
+ return -EPERM;
+ }
+
rtnl_lock();
if (netif_running(netdev))
__qlcnic_down(adapter, netdev);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index 448d156..cd346e2 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -354,7 +354,7 @@
{
int i;
- for (i = 0; i < adapter->ahw->max_vnic_func; i++) {
+ for (i = 0; i < adapter->ahw->total_nic_func; i++) {
if (adapter->npars[i].pci_func == pci_func)
return i;
}
@@ -720,6 +720,7 @@
struct qlcnic_adapter *adapter = dev_get_drvdata(dev);
struct qlcnic_npar_func_cfg *np_cfg;
struct qlcnic_info nic_info;
+ u8 pci_func;
int i, ret;
u32 count;
@@ -729,26 +730,28 @@
count = size / sizeof(struct qlcnic_npar_func_cfg);
for (i = 0; i < adapter->ahw->total_nic_func; i++) {
- if (qlcnic_is_valid_nic_func(adapter, i) < 0)
- continue;
if (adapter->npars[i].pci_func >= count) {
dev_dbg(dev, "%s: Total nic functions[%d], App sent function count[%d]\n",
__func__, adapter->ahw->total_nic_func, count);
continue;
}
- ret = qlcnic_get_nic_info(adapter, &nic_info, i);
- if (ret)
- return ret;
if (!adapter->npars[i].eswitch_status)
continue;
- np_cfg[i].pci_func = i;
- np_cfg[i].op_mode = (u8)nic_info.op_mode;
- np_cfg[i].port_num = nic_info.phys_port;
- np_cfg[i].fw_capab = nic_info.capabilities;
- np_cfg[i].min_bw = nic_info.min_tx_bw;
- np_cfg[i].max_bw = nic_info.max_tx_bw;
- np_cfg[i].max_tx_queues = nic_info.max_tx_ques;
- np_cfg[i].max_rx_queues = nic_info.max_rx_ques;
+ pci_func = adapter->npars[i].pci_func;
+ if (qlcnic_is_valid_nic_func(adapter, pci_func) < 0)
+ continue;
+ ret = qlcnic_get_nic_info(adapter, &nic_info, pci_func);
+ if (ret)
+ return ret;
+
+ np_cfg[pci_func].pci_func = pci_func;
+ np_cfg[pci_func].op_mode = (u8)nic_info.op_mode;
+ np_cfg[pci_func].port_num = nic_info.phys_port;
+ np_cfg[pci_func].fw_capab = nic_info.capabilities;
+ np_cfg[pci_func].min_bw = nic_info.min_tx_bw;
+ np_cfg[pci_func].max_bw = nic_info.max_tx_bw;
+ np_cfg[pci_func].max_tx_queues = nic_info.max_tx_ques;
+ np_cfg[pci_func].max_rx_queues = nic_info.max_rx_ques;
}
return size;
}
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 430bb0d..e36f194 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -365,7 +365,7 @@
dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
if (status == 0)
- *data = buf[1];
+ *data = (buf[1] & mask) >> shift;
return status;
}
@@ -1025,14 +1025,6 @@
return -EINVAL;
}
- rc = at86rf230_read_subreg(lp, SR_AVDD_OK, &status);
- if (rc)
- return rc;
- if (!status) {
- dev_err(&lp->spi->dev, "AVDD error\n");
- return -EINVAL;
- }
-
return 0;
}
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index c55e316..82355d5 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1755,8 +1755,8 @@
if (err)
return err;
- return iptunnel_xmit(rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df,
- false);
+ return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+ tos, ttl, df, false);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 84734a8..83c39e2 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -1521,11 +1521,7 @@
cosa_putstatus(cosa, 0);
cosa_getdata8(cosa);
cosa_putstatus(cosa, SR_RST);
-#ifdef MODULE
msleep(500);
-#else
- udelay(5*100000);
-#endif
/* Disable all IRQs from the card */
cosa_putstatus(cosa, 0);
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 1990285..c316051 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -1252,7 +1252,7 @@
return rc;
sclp_pdev = platform_device_register_simple("sclp", -1, NULL, 0);
- rc = PTR_RET(sclp_pdev);
+ rc = PTR_ERR_OR_ZERO(sclp_pdev);
if (rc)
goto fail_platform_driver_unregister;
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 6e8f90f..6e14999 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -515,7 +515,7 @@
if (rc)
goto out;
sclp_pdev = platform_device_register_simple("sclp_mem", -1, NULL, 0);
- rc = PTR_RET(sclp_pdev);
+ rc = PTR_ERR_OR_ZERO(sclp_pdev);
if (rc)
goto out_driver;
sclp_add_standby_memory();
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index 4eed38c..cd9c919 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -97,15 +97,18 @@
static int __sclp_vt220_emit(struct sclp_vt220_request *request);
static void sclp_vt220_emit_current(void);
-/* Registration structure for our interest in SCLP event buffers */
+/* Registration structure for SCLP output event buffers */
static struct sclp_register sclp_vt220_register = {
.send_mask = EVTYP_VT220MSG_MASK,
- .receive_mask = EVTYP_VT220MSG_MASK,
- .state_change_fn = NULL,
- .receiver_fn = sclp_vt220_receiver_fn,
.pm_event_fn = sclp_vt220_pm_event_fn,
};
+/* Registration structure for SCLP input event buffers */
+static struct sclp_register sclp_vt220_register_input = {
+ .receive_mask = EVTYP_VT220MSG_MASK,
+ .receiver_fn = sclp_vt220_receiver_fn,
+};
+
/*
* Put provided request buffer back into queue and check emit pending
@@ -715,9 +718,14 @@
rc = tty_register_driver(driver);
if (rc)
goto out_init;
+ rc = sclp_register(&sclp_vt220_register_input);
+ if (rc)
+ goto out_reg;
sclp_vt220_driver = driver;
return 0;
+out_reg:
+ tty_unregister_driver(driver);
out_init:
__sclp_vt220_cleanup();
out_driver:
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 262dcbb..024fd03 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -220,7 +220,6 @@
BPF_S_ANC_RXHASH,
BPF_S_ANC_CPU,
BPF_S_ANC_ALU_XOR_X,
- BPF_S_ANC_SECCOMP_LD_W,
BPF_S_ANC_VLAN_TAG,
BPF_S_ANC_VLAN_TAG_PRESENT,
BPF_S_ANC_PAY_OFFSET,
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index ec2ffaf..df78dc2 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -87,7 +87,6 @@
/* delete keymap entries */
void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
-void nf_ct_gre_keymap_flush(struct net *net);
void nf_nat_need_gre(void);
#endif /* __KERNEL__ */
diff --git a/include/net/dst.h b/include/net/dst.h
index 46ed958..71c60f4 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -45,7 +45,7 @@
void *__pad1;
#endif
int (*input)(struct sk_buff *);
- int (*output)(struct sk_buff *);
+ int (*output)(struct sock *sk, struct sk_buff *skb);
unsigned short flags;
#define DST_HOST 0x0001
@@ -367,7 +367,11 @@
return child;
}
-int dst_discard(struct sk_buff *skb);
+int dst_discard_sk(struct sock *sk, struct sk_buff *skb);
+static inline int dst_discard(struct sk_buff *skb)
+{
+ return dst_discard_sk(skb->sk, skb);
+}
void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
int initial_obsolete, unsigned short flags);
void __dst_free(struct dst_entry *dst);
@@ -449,9 +453,13 @@
}
/* Output packet to network from transport. */
+static inline int dst_output_sk(struct sock *sk, struct sk_buff *skb)
+{
+ return skb_dst(skb)->output(sk, skb);
+}
static inline int dst_output(struct sk_buff *skb)
{
- return skb_dst(skb)->output(skb);
+ return dst_output_sk(skb->sk, skb);
}
/* Input packet from network to transport. */
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index f981ba7..74af137 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -40,7 +40,7 @@
void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl);
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu);
#endif /* _INET6_CONNECTION_SOCK_H */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c55aeed..7a43138 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -36,7 +36,7 @@
* (i.e. things that depend on the address family)
*/
struct inet_connection_sock_af_ops {
- int (*queue_xmit)(struct sk_buff *skb, struct flowi *fl);
+ int (*queue_xmit)(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
void (*send_check)(struct sock *sk, struct sk_buff *skb);
int (*rebuild_header)(struct sock *sk);
void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb);
diff --git a/include/net/ip.h b/include/net/ip.h
index 25064c2..3ec2b0f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -104,14 +104,19 @@
struct net_device *orig_dev);
int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb);
-int ip_output(struct sk_buff *skb);
-int ip_mc_output(struct sk_buff *skb);
+int ip_output(struct sock *sk, struct sk_buff *skb);
+int ip_mc_output(struct sock *sk, struct sk_buff *skb);
int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
int ip_do_nat(struct sk_buff *skb);
void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct sk_buff *skb);
-int ip_local_out(struct sk_buff *skb);
-int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl);
+int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);
+static inline int ip_local_out(struct sk_buff *skb)
+{
+ return ip_local_out_sk(skb->sk, skb);
+}
+
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
void ip_init(void);
int ip_append_data(struct sock *sk, struct flowi4 *fl4,
int getfrag(void *from, char *to, int offset, int len,
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 3c3bb18..6c4f5ea 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -32,6 +32,11 @@
#define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010
#define RT6_LOOKUP_F_SRCPREF_COA 0x00000020
+/* We do not (yet ?) support IPv6 jumbograms (RFC 2675)
+ * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header
+ */
+#define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr))
+
/*
* rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
* between IPV6_ADDR_PREFERENCES socket option values
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index e77c104..a4daf9e 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -153,7 +153,7 @@
}
int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
-int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
+int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4f541f1..d640925 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -731,7 +731,7 @@
* skb processing functions
*/
-int ip6_output(struct sk_buff *skb);
+int ip6_output(struct sock *sk, struct sk_buff *skb);
int ip6_forward(struct sk_buff *skb);
int ip6_input(struct sk_buff *skb);
int ip6_mc_input(struct sk_buff *skb);
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index cf2b7ae2..a75fc8e 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -13,6 +13,16 @@
u8 len;
};
+/* Calculate the mask for the nft_cmp_fast expression. On big endian the
+ * mask needs to include the *upper* bytes when interpreting that data as
+ * something smaller than the full u32, therefore a cpu_to_le32 is done.
+ */
+static inline u32 nft_cmp_fast_mask(unsigned int len)
+{
+ return cpu_to_le32(~0U >> (FIELD_SIZEOF(struct nft_cmp_fast_expr,
+ data) * BITS_PER_BYTE - len));
+}
+
extern const struct nft_expr_ops nft_cmp_fast_ops;
int nft_cmp_module_init(void);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6ee76c8..d992ca3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1653,6 +1653,17 @@
/* This is the last advertised value of rwnd over a SACK chunk. */
__u32 a_rwnd;
+ /* Number of bytes by which the rwnd has slopped. The rwnd is allowed
+ * to slop over a maximum of the association's frag_point.
+ */
+ __u32 rwnd_over;
+
+ /* Keeps treack of rwnd pressure. This happens when we have
+ * a window, but not recevie buffer (i.e small packets). This one
+ * is releases slowly (1 PMTU at a time ).
+ */
+ __u32 rwnd_press;
+
/* This is the sndbuf size in use for the association.
* This corresponds to the sndbuf size for the association,
* as specified in the sk->sndbuf.
@@ -1881,7 +1892,8 @@
__u32 sctp_association_get_next_tsn(struct sctp_association *);
void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
-void sctp_assoc_rwnd_update(struct sctp_association *, bool);
+void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
+void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
void sctp_assoc_set_primary(struct sctp_association *,
struct sctp_transport *);
void sctp_assoc_del_nonprimary_peers(struct sctp_association *,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 32682ae..116e9c7 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -333,7 +333,7 @@
const xfrm_address_t *saddr);
int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
- int (*output)(struct sk_buff *skb);
+ int (*output)(struct sock *sk, struct sk_buff *skb);
int (*output_finish)(struct sk_buff *skb);
int (*extract_input)(struct xfrm_state *x,
struct sk_buff *skb);
@@ -1540,7 +1540,7 @@
int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm4_output(struct sk_buff *skb);
+int xfrm4_output(struct sock *sk, struct sk_buff *skb);
int xfrm4_output_finish(struct sk_buff *skb);
int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err);
int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
@@ -1565,7 +1565,7 @@
__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm6_output(struct sk_buff *skb);
+int xfrm6_output(struct sock *sk, struct sk_buff *skb);
int xfrm6_output_finish(struct sk_buff *skb);
int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
u8 **prevhdr);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d8d046c..590c379 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -69,18 +69,17 @@
{
struct task_struct *task = current;
struct pt_regs *regs = task_pt_regs(task);
+ unsigned long args[6];
sd->nr = syscall_get_nr(task, regs);
sd->arch = syscall_get_arch();
-
- /* Unroll syscall_get_args to help gcc on arm. */
- syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]);
- syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]);
- syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]);
- syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]);
- syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]);
- syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]);
-
+ syscall_get_arguments(task, regs, 0, 6, args);
+ sd->args[0] = args[0];
+ sd->args[1] = args[1];
+ sd->args[2] = args[2];
+ sd->args[3] = args[3];
+ sd->args[4] = args[4];
+ sd->args[5] = args[5];
sd->instruction_pointer = KSTK_EIP(task);
}
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0d8f602..bf71b4b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -152,7 +152,7 @@
/* Find the matching extent */
extents = map->nr_extents;
- smp_read_barrier_depends();
+ smp_rmb();
for (idx = 0; idx < extents; idx++) {
first = map->extent[idx].first;
last = first + map->extent[idx].count - 1;
@@ -176,7 +176,7 @@
/* Find the matching extent */
extents = map->nr_extents;
- smp_read_barrier_depends();
+ smp_rmb();
for (idx = 0; idx < extents; idx++) {
first = map->extent[idx].first;
last = first + map->extent[idx].count - 1;
@@ -199,7 +199,7 @@
/* Find the matching extent */
extents = map->nr_extents;
- smp_read_barrier_depends();
+ smp_rmb();
for (idx = 0; idx < extents; idx++) {
first = map->extent[idx].lower_first;
last = first + map->extent[idx].count - 1;
@@ -615,9 +615,8 @@
* were written before the count of the extents.
*
* To achieve this smp_wmb() is used on guarantee the write
- * order and smp_read_barrier_depends() is guaranteed that we
- * don't have crazy architectures returning stale data.
- *
+ * order and smp_rmb() is guaranteed that we don't have crazy
+ * architectures returning stale data.
*/
mutex_lock(&id_map_mutex);
diff --git a/net/core/dev.c b/net/core/dev.c
index 14dac065..5b3042e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2284,7 +2284,7 @@
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
__be16 type = skb->protocol;
- int vlan_depth = ETH_HLEN;
+ int vlan_depth = skb->mac_len;
/* Tunnel gso handlers can set protocol to ethernet. */
if (type == htons(ETH_P_TEB)) {
diff --git a/net/core/dst.c b/net/core/dst.c
index ca4231e..80d6286 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -142,12 +142,12 @@
mutex_unlock(&dst_gc_mutex);
}
-int dst_discard(struct sk_buff *skb)
+int dst_discard_sk(struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL(dst_discard);
+EXPORT_SYMBOL(dst_discard_sk);
const u32 dst_default_metrics[RTAX_MAX + 1] = {
/* This initializer is needed to force linker to place this variable
@@ -184,7 +184,7 @@
dst->xfrm = NULL;
#endif
dst->input = dst_discard;
- dst->output = dst_discard;
+ dst->output = dst_discard_sk;
dst->error = 0;
dst->obsolete = initial_obsolete;
dst->header_len = 0;
@@ -209,8 +209,10 @@
/* The first case (dev==NULL) is required, when
protocol module is unloaded.
*/
- if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
- dst->input = dst->output = dst_discard;
+ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+ dst->input = dst_discard;
+ dst->output = dst_discard_sk;
+ }
dst->obsolete = DST_OBSOLETE_DEAD;
}
@@ -361,7 +363,8 @@
return;
if (!unregister) {
- dst->input = dst->output = dst_discard;
+ dst->input = dst_discard;
+ dst->output = dst_discard_sk;
} else {
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index e08b382..cd58614 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -600,6 +600,9 @@
if (skb_is_nonlinear(skb))
return 0;
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
+
if (A > skb->len - sizeof(struct nlattr))
return 0;
@@ -618,11 +621,14 @@
if (skb_is_nonlinear(skb))
return 0;
+ if (skb->len < sizeof(struct nlattr))
+ return 0;
+
if (A > skb->len - sizeof(struct nlattr))
return 0;
nla = (struct nlattr *) &skb->data[A];
- if (nla->nla_len > A - skb->len)
+ if (nla->nla_len > skb->len - A)
return 0;
nla = nla_find_nested(nla, X);
@@ -1737,7 +1743,6 @@
[BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
- [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 8876078..0248e8a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -138,7 +138,7 @@
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
- err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
+ err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
return net_xmit_eval(err);
}
return -ENOBUFS;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ce0cbbf..daccc4a 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -752,7 +752,7 @@
return n->output(n, skb);
}
-static int dn_output(struct sk_buff *skb)
+static int dn_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
@@ -838,6 +838,18 @@
* Used to catch bugs. This should never normally get
* called.
*/
+static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb)
+{
+ struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
+ net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
+ le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
+
+ kfree_skb(skb);
+
+ return NET_RX_DROP;
+}
+
static int dn_rt_bug(struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -1463,7 +1475,7 @@
rt->n = neigh;
rt->dst.lastuse = jiffies;
- rt->dst.output = dn_rt_bug;
+ rt->dst.output = dn_rt_bug_sk;
switch (res.type) {
case RTN_UNICAST:
rt->dst.input = dn_forward;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1a0755f..1cbeba5 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -101,17 +101,17 @@
skb_dst(skb)->dev, dst_output);
}
-int ip_local_out(struct sk_buff *skb)
+int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
int err;
err = __ip_local_out(skb);
if (likely(err == 1))
- err = dst_output(skb);
+ err = dst_output_sk(sk, skb);
return err;
}
-EXPORT_SYMBOL_GPL(ip_local_out);
+EXPORT_SYMBOL_GPL(ip_local_out_sk);
static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
{
@@ -226,9 +226,8 @@
return ip_finish_output2(skb);
}
-int ip_mc_output(struct sk_buff *skb)
+int ip_mc_output(struct sock *sk, struct sk_buff *skb)
{
- struct sock *sk = skb->sk;
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
@@ -287,7 +286,7 @@
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
-int ip_output(struct sk_buff *skb)
+int ip_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
@@ -315,9 +314,9 @@
sizeof(fl4->saddr) + sizeof(fl4->daddr));
}
-int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
+/* Note: skb->sk can be different from sk, in case of tunnels */
+int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
{
- struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
@@ -389,6 +388,7 @@
ip_select_ident_more(skb, &rt->dst, sk,
(skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ /* TODO : should we use skb->sk here instead of sk ? */
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e77381d..484d0ce 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -670,7 +670,7 @@
return;
}
- err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index e0c2b1d..bcf206c 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,7 +46,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
-int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
+int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet)
{
@@ -76,7 +76,7 @@
iph->ttl = ttl;
__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
- err = ip_local_out(skb);
+ err = ip_local_out_sk(sk, skb);
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
return pkt_len;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index f4b19e5..8210964 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -252,26 +252,33 @@
{
struct net *net = sock_net(sk);
kgid_t group = current_egid();
- struct group_info *group_info = get_current_groups();
- int i, j, count = group_info->ngroups;
+ struct group_info *group_info;
+ int i, j, count;
kgid_t low, high;
+ int ret = 0;
inet_get_ping_group_range_net(net, &low, &high);
if (gid_lte(low, group) && gid_lte(group, high))
return 0;
+ group_info = get_current_groups();
+ count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
for (j = 0; j < cp_count; j++) {
kgid_t gid = group_info->blocks[i][j];
if (gid_lte(low, gid) && gid_lte(gid, high))
- return 0;
+ goto out_release_group;
}
count -= cp_count;
}
- return -EACCES;
+ ret = -EACCES;
+
+out_release_group:
+ put_group_info(group_info);
+ return ret;
}
EXPORT_SYMBOL_GPL(ping_init_sock);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 34d094c..1485aaf 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1129,7 +1129,7 @@
dst_set_expires(&rt->dst, 0);
}
-static int ip_rt_bug(struct sk_buff *skb)
+static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
{
pr_debug("%s: %pI4 -> %pI4, %s\n",
__func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -2218,7 +2218,7 @@
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard;
+ new->output = dst_discard_sk;
new->dev = ort->dst.dev;
if (new->dev)
@@ -2357,7 +2357,7 @@
}
} else
#endif
- if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
+ if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
goto nla_put_failure;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 699fb10..025e250 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -981,7 +981,7 @@
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
- err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
+ err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
if (likely(err <= 0))
return err;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index baa0f63..40e701f 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -86,7 +86,7 @@
return xfrm_output(skb);
}
-int xfrm4_output(struct sk_buff *skb)
+int xfrm4_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index c913818..d4ade34 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -224,9 +224,8 @@
return dst;
}
-int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
+int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)
{
- struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
struct flowi6 fl6;
struct dst_entry *dst;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c98338b..9d92146 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1559,6 +1559,15 @@
return 0;
}
+static void ip6gre_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ if (dev != ign->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ip6gre_get_size(const struct net_device *dev)
{
return
@@ -1636,6 +1645,7 @@
.validate = ip6gre_tunnel_validate,
.newlink = ip6gre_newlink,
.changelink = ip6gre_changelink,
+ .dellink = ip6gre_dellink,
.get_size = ip6gre_get_size,
.fill_info = ip6gre_fill_info,
};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3284d61..40e7581 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -132,7 +132,7 @@
return ip6_finish_output2(skb);
}
-int ip6_output(struct sk_buff *skb)
+int ip6_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5015c50..4011617 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -84,9 +84,9 @@
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
@@ -290,7 +290,7 @@
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
- .output = dst_discard,
+ .output = dst_discard_sk,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
@@ -1058,7 +1058,7 @@
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard;
+ new->output = dst_discard_sk;
if (dst_metrics_read_only(&ort->dst))
new->_metrics = ort->dst._metrics;
@@ -1338,7 +1338,7 @@
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
- return mtu;
+ goto out;
mtu = IPV6_MIN_MTU;
@@ -1348,7 +1348,8 @@
mtu = idev->cnf.mtu6;
rcu_read_unlock();
- return mtu;
+out:
+ return min_t(unsigned int, mtu, IP6_MAX_MTU);
}
static struct dst_entry *icmp6_dst_gc_list;
@@ -1576,7 +1577,7 @@
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
- rt->dst.output = dst_discard;
+ rt->dst.output = dst_discard_sk;
rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
@@ -2128,7 +2129,7 @@
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2139,7 +2140,7 @@
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1693c8d..8da8268 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -974,8 +974,9 @@
goto out;
}
- err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
- ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
+ IPPROTO_IPV6, tos, ttl, df,
+ !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6cd625e..19ef329 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -163,7 +163,7 @@
return x->outer_mode->afinfo->output_finish(skb);
}
-int xfrm6_output(struct sk_buff *skb)
+int xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
skb_dst(skb)->dev, __xfrm6_output);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 47f7a54..a4e37d7 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1131,10 +1131,10 @@
skb->local_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
- error = inet6_csk_xmit(skb, NULL);
+ error = inet6_csk_xmit(tunnel->sock, skb, NULL);
else
#endif
- error = ip_queue_xmit(skb, fl);
+ error = ip_queue_xmit(tunnel->sock, skb, fl);
/* Update stats */
if (error >= 0) {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0b44d85..3397fe6 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -487,7 +487,7 @@
xmit:
/* Queue the packet to IP for output */
- rc = ip_queue_xmit(skb, &inet->cork.fl);
+ rc = ip_queue_xmit(sk, skb, &inet->cork.fl);
rcu_read_unlock();
error:
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6dba48e..75421f2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1795,6 +1795,7 @@
int cpu;
atomic_set(&net->ct.count, 0);
+ seqcount_init(&net->ct.generation);
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
if (!net->ct.pcpu_lists)
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 7bd03de..825c3e3 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -605,32 +605,14 @@
.expect_policy = &pptp_exp_policy,
};
-static void nf_conntrack_pptp_net_exit(struct net *net)
-{
- nf_ct_gre_keymap_flush(net);
-}
-
-static struct pernet_operations nf_conntrack_pptp_net_ops = {
- .exit = nf_conntrack_pptp_net_exit,
-};
-
static int __init nf_conntrack_pptp_init(void)
{
- int rv;
-
- rv = nf_conntrack_helper_register(&pptp);
- if (rv < 0)
- return rv;
- rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops);
- if (rv < 0)
- nf_conntrack_helper_unregister(&pptp);
- return rv;
+ return nf_conntrack_helper_register(&pptp);
}
static void __exit nf_conntrack_pptp_fini(void)
{
nf_conntrack_helper_unregister(&pptp);
- unregister_pernet_subsys(&nf_conntrack_pptp_net_ops);
}
module_init(nf_conntrack_pptp_init);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9d9c0da..d566573 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -66,7 +66,7 @@
return net_generic(net, proto_gre_net_id);
}
-void nf_ct_gre_keymap_flush(struct net *net)
+static void nf_ct_gre_keymap_flush(struct net *net)
{
struct netns_proto_gre *net_gre = gre_pernet(net);
struct nf_ct_gre_keymap *km, *tmp;
@@ -78,7 +78,6 @@
}
write_unlock_bh(&net_gre->keymap_lock);
}
-EXPORT_SYMBOL(nf_ct_gre_keymap_flush);
static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
const struct nf_conntrack_tuple *t)
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 90998a6..8041053 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -25,9 +25,8 @@
struct nft_data data[NFT_REG_MAX + 1])
{
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
- u32 mask;
+ u32 mask = nft_cmp_fast_mask(priv->len);
- mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
if ((data[priv->sreg].data[0] & mask) == priv->data)
return;
data[NFT_REG_VERDICT].verdict = NFT_BREAK;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 954925d..e2b3f51 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -128,7 +128,7 @@
BUG_ON(err < 0);
desc.len *= BITS_PER_BYTE;
- mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len);
+ mask = nft_cmp_fast_mask(desc.len);
priv->data = data.data[0] & mask;
priv->len = desc.len;
return 0;
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index a3d6951..ebb6e24 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -174,7 +174,7 @@
skb->local_df = 1;
- return iptunnel_xmit(rt, skb, fl.saddr,
+ return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
OVS_CB(skb)->tun_key->ipv4_tos,
OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 4f6d6f9..39579c3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1395,35 +1395,44 @@
return false;
}
-/* Update asoc's rwnd for the approximated state in the buffer,
- * and check whether SACK needs to be sent.
- */
-void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
+/* Increase asoc's rwnd by len and send any window update SACK if needed. */
+void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
{
- int rx_count;
struct sctp_chunk *sack;
struct timer_list *timer;
- if (asoc->ep->rcvbuf_policy)
- rx_count = atomic_read(&asoc->rmem_alloc);
- else
- rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+ if (asoc->rwnd_over) {
+ if (asoc->rwnd_over >= len) {
+ asoc->rwnd_over -= len;
+ } else {
+ asoc->rwnd += (len - asoc->rwnd_over);
+ asoc->rwnd_over = 0;
+ }
+ } else {
+ asoc->rwnd += len;
+ }
- if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0)
- asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1;
- else
- asoc->rwnd = 0;
+ /* If we had window pressure, start recovering it
+ * once our rwnd had reached the accumulated pressure
+ * threshold. The idea is to recover slowly, but up
+ * to the initial advertised window.
+ */
+ if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
+ int change = min(asoc->pathmtu, asoc->rwnd_press);
+ asoc->rwnd += change;
+ asoc->rwnd_press -= change;
+ }
- pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n",
- __func__, asoc, asoc->rwnd, rx_count,
- asoc->base.sk->sk_rcvbuf);
+ pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
+ __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+ asoc->a_rwnd);
/* Send a window update SACK if the rwnd has increased by at least the
* minimum of the association's PMTU and half of the receive buffer.
* The algorithm used is similar to the one described in
* Section 4.2.3.3 of RFC 1122.
*/
- if (update_peer && sctp_peer_needs_update(asoc)) {
+ if (sctp_peer_needs_update(asoc)) {
asoc->a_rwnd = asoc->rwnd;
pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
@@ -1445,6 +1454,45 @@
}
}
+/* Decrease asoc's rwnd by len. */
+void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
+{
+ int rx_count;
+ int over = 0;
+
+ if (unlikely(!asoc->rwnd || asoc->rwnd_over))
+ pr_debug("%s: association:%p has asoc->rwnd:%u, "
+ "asoc->rwnd_over:%u!\n", __func__, asoc,
+ asoc->rwnd, asoc->rwnd_over);
+
+ if (asoc->ep->rcvbuf_policy)
+ rx_count = atomic_read(&asoc->rmem_alloc);
+ else
+ rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+
+ /* If we've reached or overflowed our receive buffer, announce
+ * a 0 rwnd if rwnd would still be positive. Store the
+ * the potential pressure overflow so that the window can be restored
+ * back to original value.
+ */
+ if (rx_count >= asoc->base.sk->sk_rcvbuf)
+ over = 1;
+
+ if (asoc->rwnd >= len) {
+ asoc->rwnd -= len;
+ if (over) {
+ asoc->rwnd_press += asoc->rwnd;
+ asoc->rwnd = 0;
+ }
+ } else {
+ asoc->rwnd_over = len - asoc->rwnd;
+ asoc->rwnd = 0;
+ }
+
+ pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
+ __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+ asoc->rwnd_press);
+}
/* Build the bind address list for the association based on info from the
* local endpoint and the remote peer.
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4e1d0fc..c09757f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -957,7 +957,7 @@
SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
- return ip_queue_xmit(skb, &transport->fl);
+ return ip_queue_xmit(&inet->sk, skb, &transport->fl);
}
static struct sctp_af sctp_af_inet;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 01e0024..ae9fbeb 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6178,7 +6178,7 @@
* PMTU. In cases, such as loopback, this might be a rather
* large spill over.
*/
- if ((!chunk->data_accepted) && (!asoc->rwnd ||
+ if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
(datalen > asoc->rwnd + asoc->frag_point))) {
/* If this is the next TSN, consider reneging to make
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e13519e..ff20e2d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2115,6 +2115,12 @@
sctp_skb_pull(skb, copied);
skb_queue_head(&sk->sk_receive_queue, skb);
+ /* When only partial message is copied to the user, increase
+ * rwnd by that amount. If all the data in the skb is read,
+ * rwnd is updated when the event is freed.
+ */
+ if (!sctp_ulpevent_is_notification(event))
+ sctp_assoc_rwnd_increase(event->asoc, copied);
goto out;
} else if ((event->msg_flags & MSG_NOTIFICATION) ||
(event->msg_flags & MSG_EOR))
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 8d198ae..85c6465 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -989,7 +989,7 @@
skb = sctp_event2skb(event);
/* Set the owner and charge rwnd for bytes received. */
sctp_ulpevent_set_owner(event, asoc);
- sctp_assoc_rwnd_update(asoc, false);
+ sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
if (!skb->data_len)
return;
@@ -1011,7 +1011,6 @@
{
struct sk_buff *skb, *frag;
unsigned int len;
- struct sctp_association *asoc;
/* Current stack structures assume that the rcv buffer is
* per socket. For UDP style sockets this is not true as
@@ -1036,11 +1035,8 @@
}
done:
- asoc = event->asoc;
- sctp_association_hold(asoc);
+ sctp_assoc_rwnd_increase(event->asoc, len);
sctp_ulpevent_release_owner(event);
- sctp_assoc_rwnd_update(asoc, true);
- sctp_association_put(asoc);
}
static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f02f511..c08fbd1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1842,7 +1842,7 @@
xfrm_pol_put(pol);
}
-static int xdst_queue_output(struct sk_buff *skb)
+static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
{
unsigned long sched_next;
struct dst_entry *dst = skb_dst(skb);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index d4b6015..2458a1d 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -97,6 +97,14 @@
bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
}
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
+
+static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
+{
+ if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
+ kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
{
bool new_val, old_val;
@@ -120,9 +128,8 @@
} else {
__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
ioapic->rtc_status.pending_eoi--;
+ rtc_status_pending_eoi_check_valid(ioapic);
}
-
- WARN_ON(ioapic->rtc_status.pending_eoi < 0);
}
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
@@ -149,10 +156,10 @@
static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
{
- if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map))
+ if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
--ioapic->rtc_status.pending_eoi;
-
- WARN_ON(ioapic->rtc_status.pending_eoi < 0);
+ rtc_status_pending_eoi_check_valid(ioapic);
+ }
}
static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
@@ -353,10 +360,16 @@
ioapic->irr &= ~(1 << irq);
if (irq == RTC_GSI && line_status) {
+ /*
+ * pending_eoi cannot ever become negative (see
+ * rtc_status_pending_eoi_check_valid) and the caller
+ * ensures that it is only called if it is >= zero, namely
+ * if rtc_irq_check_coalesced returns false).
+ */
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
ioapic->rtc_status.dest_map);
- ioapic->rtc_status.pending_eoi = ret;
+ ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
} else
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);