Merge branch 'akpm' (patches from Andrew) Merge misc fixes from Andrew Morton: "12 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: dmapool: fix overflow condition in pool_find_page() thermal: avoid division by zero in power allocator memcg: remove pcp_counter_lock kprobes: use _do_fork() in samples to make them work again drivers/input/joystick/Kconfig: zhenhua.c needs BITREVERSE memcg: make mem_cgroup_read_stat() unsigned memcg: fix dirty page migration dax: fix NULL pointer in __dax_pmd_fault() mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault mm/slab: fix unexpected index mapping result of kmalloc_size(INDEX_NODE+1) userfaultfd: remove kernel header include from uapi header arch/x86/include/asm/efi.h: fix build failure

commit: bde17b90dd9712cb61a7ab0c1ccd0f7f6aa57957 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Thu Oct 01 22:20:11 2015 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> Thu Oct 01 22:20:11 2015 -0400
tree: d4597bfde4f5d4a3a5729e5534c44993c6216352
parent: 1bca1000fa71a1092947b4a51928abe80a3316d2 [diff]
parent: 676bd99178cd962ed24ffdad222b7069d330a969 [diff]
diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt
index 62328d7..b0e911e 100644
--- a/Documentation/power/pci.txt
+++ b/Documentation/power/pci.txt

@@ -979,20 +979,45 @@
 (alternatively, the runtime_suspend() callback will have to check if the
 device should really be suspended and return -EAGAIN if that is not the case).
 
-The runtime PM of PCI devices is disabled by default.  It is also blocked by
-pci_pm_init() that runs the pm_runtime_forbid() helper function.  If a PCI
-driver implements the runtime PM callbacks and intends to use the runtime PM
-framework provided by the PM core and the PCI subsystem, it should enable this
-feature by executing the pm_runtime_enable() helper function.  However, the
-driver should not call the pm_runtime_allow() helper function unblocking
-the runtime PM of the device.  Instead, it should allow user space or some
-platform-specific code to do that (user space can do it via sysfs), although
-once it has called pm_runtime_enable(), it must be prepared to handle the
+The runtime PM of PCI devices is enabled by default by the PCI core.  PCI
+device drivers do not need to enable it and should not attempt to do so.
+However, it is blocked by pci_pm_init() that runs the pm_runtime_forbid()
+helper function.  In addition to that, the runtime PM usage counter of
+each PCI device is incremented by local_pci_probe() before executing the
+probe callback provided by the device's driver.
+
+If a PCI driver implements the runtime PM callbacks and intends to use the
+runtime PM framework provided by the PM core and the PCI subsystem, it needs
+to decrement the device's runtime PM usage counter in its probe callback
+function.  If it doesn't do that, the counter will always be different from
+zero for the device and it will never be runtime-suspended.  The simplest
+way to do that is by calling pm_runtime_put_noidle(), but if the driver
+wants to schedule an autosuspend right away, for example, it may call
+pm_runtime_put_autosuspend() instead for this purpose.  Generally, it
+just needs to call a function that decrements the devices usage counter
+from its probe routine to make runtime PM work for the device.
+
+It is important to remember that the driver's runtime_suspend() callback
+may be executed right after the usage counter has been decremented, because
+user space may already have cuased the pm_runtime_allow() helper function
+unblocking the runtime PM of the device to run via sysfs, so the driver must
+be prepared to cope with that.
+
+The driver itself should not call pm_runtime_allow(), though.  Instead, it
+should let user space or some platform-specific code do that (user space can
+do it via sysfs as stated above), but it must be prepared to handle the
 runtime PM of the device correctly as soon as pm_runtime_allow() is called
-(which may happen at any time).  [It also is possible that user space causes
-pm_runtime_allow() to be called via sysfs before the driver is loaded, so in
-fact the driver has to be prepared to handle the runtime PM of the device as
-soon as it calls pm_runtime_enable().]
+(which may happen at any time, even before the driver is loaded).
+
+When the driver's remove callback runs, it has to balance the decrementation
+of the device's runtime PM usage counter at the probe time.  For this reason,
+if it has decremented the counter in its probe callback, it must run
+pm_runtime_get_noresume() in its remove callback.  [Since the core carries
+out a runtime resume of the device and bumps up the device's usage counter
+before running the driver's remove callback, the runtime PM of the device
+is effectively disabled for the duration of the remove execution and all
+runtime PM helper functions incrementing the device's usage counter are
+then effectively equivalent to pm_runtime_get_noresume().]
 
 The runtime PM framework works by processing requests to suspend or resume
 devices, or to check if they are idle (in which cases it is reasonable to

diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c
index 2bc8abc..6c6247a 100644
--- a/Documentation/ptp/testptp.c
+++ b/Documentation/ptp/testptp.c

@@ -18,6 +18,7 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__        /* For PPC64, to get LL64 types */
 #include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>

diff --git a/MAINTAINERS b/MAINTAINERS
index 9f6685f..797236b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -5957,7 +5957,7 @@
 KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V
 M:	Joerg Roedel <joro@8bytes.org>
 L:	kvm@vger.kernel.org
-W:	http://kvm.qumranet.com
+W:	http://www.linux-kvm.org/
 S:	Maintained
 F:	arch/x86/include/asm/svm.h
 F:	arch/x86/kvm/svm.c
@@ -5965,7 +5965,7 @@
 KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
 M:	Alexander Graf <agraf@suse.com>
 L:	kvm-ppc@vger.kernel.org
-W:	http://kvm.qumranet.com
+W:	http://www.linux-kvm.org/
 T:	git git://github.com/agraf/linux-2.6.git
 S:	Supported
 F:	arch/powerpc/include/asm/kvm*

diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 655e07a..67f0823 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h

@@ -41,6 +41,7 @@
 
 #define PVCLOCK_TSC_STABLE_BIT	(1 << 0)
 #define PVCLOCK_GUEST_STOPPED	(1 << 1)
+/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
 #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PVCLOCK_ABI_H */

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 94b7d15..2f9ed1f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c

@@ -514,7 +514,7 @@
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if (svm->vmcb->control.next_rip != 0) {
-		WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS));
+		WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
 		svm->next_rip = svm->vmcb->control.next_rip;
 	}
 
@@ -866,64 +866,6 @@
 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }
 
-#define MTRR_TYPE_UC_MINUS	7
-#define MTRR2PROTVAL_INVALID 0xff
-
-static u8 mtrr2protval[8];
-
-static u8 fallback_mtrr_type(int mtrr)
-{
-	/*
-	 * WT and WP aren't always available in the host PAT.  Treat
-	 * them as UC and UC- respectively.  Everything else should be
-	 * there.
-	 */
-	switch (mtrr)
-	{
-	case MTRR_TYPE_WRTHROUGH:
-		return MTRR_TYPE_UNCACHABLE;
-	case MTRR_TYPE_WRPROT:
-		return MTRR_TYPE_UC_MINUS;
-	default:
-		BUG();
-	}
-}
-
-static void build_mtrr2protval(void)
-{
-	int i;
-	u64 pat;
-
-	for (i = 0; i < 8; i++)
-		mtrr2protval[i] = MTRR2PROTVAL_INVALID;
-
-	/* Ignore the invalid MTRR types.  */
-	mtrr2protval[2] = 0;
-	mtrr2protval[3] = 0;
-
-	/*
-	 * Use host PAT value to figure out the mapping from guest MTRR
-	 * values to nested page table PAT/PCD/PWT values.  We do not
-	 * want to change the host PAT value every time we enter the
-	 * guest.
-	 */
-	rdmsrl(MSR_IA32_CR_PAT, pat);
-	for (i = 0; i < 8; i++) {
-		u8 mtrr = pat >> (8 * i);
-
-		if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
-			mtrr2protval[mtrr] = __cm_idx2pte(i);
-	}
-
-	for (i = 0; i < 8; i++) {
-		if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
-			u8 fallback = fallback_mtrr_type(i);
-			mtrr2protval[i] = mtrr2protval[fallback];
-			BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
-		}
-	}
-}
-
 static __init int svm_hardware_setup(void)
 {
 	int cpu;
@@ -990,7 +932,6 @@
 	} else
 		kvm_disable_tdp();
 
-	build_mtrr2protval();
 	return 0;
 
 err:
@@ -1145,43 +1086,6 @@
 	return target_tsc - tsc;
 }
 
-static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
-{
-	struct kvm_vcpu *vcpu = &svm->vcpu;
-
-	/* Unlike Intel, AMD takes the guest's CR0.CD into account.
-	 *
-	 * AMD does not have IPAT.  To emulate it for the case of guests
-	 * with no assigned devices, just set everything to WB.  If guests
-	 * have assigned devices, however, we cannot force WB for RAM
-	 * pages only, so use the guest PAT directly.
-	 */
-	if (!kvm_arch_has_assigned_device(vcpu->kvm))
-		*g_pat = 0x0606060606060606;
-	else
-		*g_pat = vcpu->arch.pat;
-}
-
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
-	u8 mtrr;
-
-	/*
-	 * 1. MMIO: trust guest MTRR, so same as item 3.
-	 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
-	 * 3. Passthrough: can't guarantee the result, try to trust guest.
-	 */
-	if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
-		return 0;
-
-	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) &&
-	    kvm_read_cr0(vcpu) & X86_CR0_CD)
-		return _PAGE_NOCACHE;
-
-	mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
-	return mtrr2protval[mtrr];
-}
-
 static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1278,7 +1182,6 @@
 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
 		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
 		save->g_pat = svm->vcpu.arch.pat;
-		svm_set_guest_pat(svm, &save->g_pat);
 		save->cr3 = 0;
 		save->cr4 = 0;
 	}
@@ -1673,10 +1576,13 @@
 
 	if (!vcpu->fpu_active)
 		cr0 |= X86_CR0_TS;
-
-	/* These are emulated via page tables.  */
-	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
-
+	/*
+	 * re-enable caching here because the QEMU bios
+	 * does not do it - this results in some delay at
+	 * reboot
+	 */
+	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
 	svm->vmcb->save.cr0 = cr0;
 	mark_dirty(svm->vmcb, VMCB_CR);
 	update_cr0_intercept(svm);
@@ -3351,16 +3257,6 @@
 	case MSR_VM_IGNNE:
 		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
 		break;
-	case MSR_IA32_CR_PAT:
-		if (npt_enabled) {
-			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
-				return 1;
-			vcpu->arch.pat = data;
-			svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
-			mark_dirty(svm->vmcb, VMCB_NPT);
-			break;
-		}
-		/* fall through */
 	default:
 		return kvm_set_msr_common(vcpu, msr);
 	}
@@ -4195,6 +4091,11 @@
 	return true;
 }
 
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	return 0;
+}
+
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 }

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6407674..06ef490 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c

@@ -8617,17 +8617,22 @@
 	u64 ipat = 0;
 
 	/* For VT-d and EPT combination
-	 * 1. MMIO: guest may want to apply WC, trust it.
+	 * 1. MMIO: always map as UC
 	 * 2. EPT with VT-d:
 	 *   a. VT-d without snooping control feature: can't guarantee the
-	 *	result, try to trust guest.  So the same as item 1.
+	 *	result, try to trust guest.
 	 *   b. VT-d with snooping control feature: snooping control feature of
 	 *	VT-d engine can guarantee the cache correctness. Just set it
 	 *	to WB to keep consistent with host. So the same as item 3.
 	 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
 	 *    consistent with host MTRR
 	 */
-	if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+	if (is_mmio) {
+		cache = MTRR_TYPE_UNCACHABLE;
+		goto exit;
+	}
+
+	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
 		ipat = VMX_EPT_IPAT_BIT;
 		cache = MTRR_TYPE_WRBACK;
 		goto exit;

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 991466b..92511d4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c

@@ -1708,8 +1708,6 @@
 		vcpu->pvclock_set_guest_stopped_request = false;
 	}
 
-	pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO;
-
 	/* If the host uses TSC clocksource, then it is stable */
 	if (use_master_clock)
 		pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
@@ -2007,8 +2005,6 @@
 					&vcpu->requests);
 
 			ka->boot_vcpu_runs_old_kvmclock = tmp;
-
-			ka->kvmclock_offset = -get_kernel_ns();
 		}
 
 		vcpu->arch.time = data;

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 2614a83..42c66b6 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c

@@ -1044,8 +1044,10 @@
 		goto err_exit;
 
 	mutex_lock(&ec->mutex);
+	result = -ENODATA;
 	list_for_each_entry(handler, &ec->list, node) {
 		if (value == handler->query_bit) {
+			result = 0;
 			q->handler = acpi_ec_get_query_handler(handler);
 			ec_dbg_evt("Query(0x%02x) scheduled",
 				   q->handler->query_bit);

diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 6da0f9b..c933675 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c

@@ -372,6 +372,7 @@
 
 	/* Interrupt Line values above 0xF are forbidden */
 	if (dev->irq > 0 && (dev->irq <= 0xF) &&
+	    acpi_isa_irq_available(dev->irq) &&
 	    (acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) {
 		dev_warn(&dev->dev, "PCI INT %c: no GSI - using ISA IRQ %d\n",
 			 pin_name(dev->pin), dev->irq);

diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 3b4ea98..7c8408b 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c

@@ -498,8 +498,7 @@
 			    PIRQ_PENALTY_PCI_POSSIBLE;
 		}
 	}
-	/* Add a penalty for the SCI */
-	acpi_irq_penalty[acpi_gbl_FADT.sci_interrupt] += PIRQ_PENALTY_PCI_USING;
+
 	return 0;
 }
 
@@ -553,6 +552,13 @@
 				irq = link->irq.possible[i];
 		}
 	}
+	if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) {
+		printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. "
+			    "Try pci=noacpi or acpi=off\n",
+			    acpi_device_name(link->device),
+			    acpi_device_bid(link->device));
+		return -ENODEV;
+	}
 
 	/* Attempt to enable the link device at this IRQ. */
 	if (acpi_pci_link_set(link, irq)) {
@@ -821,6 +827,12 @@
 	}
 }
 
+bool acpi_isa_irq_available(int irq)
+{
+	return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) ||
+			    acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS);
+}
+
 /*
  * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with
  * PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for

diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 28cd75c..7ae7cd9 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c

@@ -892,10 +892,17 @@
 	u32 microvolt[3] = {0};
 	int count, ret;
 
-	count = of_property_count_u32_elems(opp->np, "opp-microvolt");
-	if (!count)
+	/* Missing property isn't a problem, but an invalid entry is */
+	if (!of_find_property(opp->np, "opp-microvolt", NULL))
 		return 0;
 
+	count = of_property_count_u32_elems(opp->np, "opp-microvolt");
+	if (count < 0) {
+		dev_err(dev, "%s: Invalid opp-microvolt property (%d)\n",
+			__func__, count);
+		return count;
+	}
+
 	/* There can be one or three elements here */
 	if (count != 1 && count != 3) {
 		dev_err(dev, "%s: Invalid number of elements in opp-microvolt property (%d)\n",
@@ -1063,7 +1070,7 @@
  * share a common logic which is isolated here.
  *
  * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
- * copy operation, returns 0 if no modifcation was done OR modification was
+ * copy operation, returns 0 if no modification was done OR modification was
  * successful.
  *
  * Locking: The internal device_opp and opp structures are RCU protected.
@@ -1151,7 +1158,7 @@
  * mutex locking or synchronize_rcu() blocking calls cannot be used.
  *
  * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
- * copy operation, returns 0 if no modifcation was done OR modification was
+ * copy operation, returns 0 if no modification was done OR modification was
  * successful.
  */
 int dev_pm_opp_enable(struct device *dev, unsigned long freq)
@@ -1177,7 +1184,7 @@
  * mutex locking or synchronize_rcu() blocking calls cannot be used.
  *
  * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
- * copy operation, returns 0 if no modifcation was done OR modification was
+ * copy operation, returns 0 if no modification was done OR modification was
  * successful.
  */
 int dev_pm_opp_disable(struct device *dev, unsigned long freq)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 3a3738f..cd4510a 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c

@@ -620,7 +620,7 @@
 		.name = "C6-SKL",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
-		.exit_latency = 75,
+		.exit_latency = 85,
 		.target_residency = 200,
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
@@ -636,11 +636,19 @@
 		.name = "C8-SKL",
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
-		.exit_latency = 174,
+		.exit_latency = 200,
 		.target_residency = 800,
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
+		.name = "C9-SKL",
+		.desc = "MWAIT 0x50",
+		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 480,
+		.target_residency = 5000,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
 		.name = "C10-SKL",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 41d6911..f1ccd40 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c

@@ -245,7 +245,6 @@
 		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
 	if (MLX5_CAP_GEN(mdev, apm))
 		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
-	props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
 	if (MLX5_CAP_GEN(mdev, xrc))
 		props->device_cap_flags |= IB_DEVICE_XRC;
 	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
@@ -795,53 +794,6 @@
 	return 0;
 }
 
-static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
-{
-	struct mlx5_create_mkey_mbox_in *in;
-	struct mlx5_mkey_seg *seg;
-	struct mlx5_core_mr mr;
-	int err;
-
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
-	if (!in)
-		return -ENOMEM;
-
-	seg = &in->seg;
-	seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
-	seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
-	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-	seg->start_addr = 0;
-
-	err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
-				    NULL, NULL, NULL);
-	if (err) {
-		mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
-		goto err_in;
-	}
-
-	kfree(in);
-	*key = mr.key;
-
-	return 0;
-
-err_in:
-	kfree(in);
-
-	return err;
-}
-
-static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
-{
-	struct mlx5_core_mr mr;
-	int err;
-
-	memset(&mr, 0, sizeof(mr));
-	mr.key = key;
-	err = mlx5_core_destroy_mkey(dev->mdev, &mr);
-	if (err)
-		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
-}
-
 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
 				      struct ib_ucontext *context,
 				      struct ib_udata *udata)
@@ -867,13 +819,6 @@
 			kfree(pd);
 			return ERR_PTR(-EFAULT);
 		}
-	} else {
-		err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
-		if (err) {
-			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
-			kfree(pd);
-			return ERR_PTR(err);
-		}
 	}
 
 	return &pd->ibpd;
@@ -884,9 +829,6 @@
 	struct mlx5_ib_dev *mdev = to_mdev(pd->device);
 	struct mlx5_ib_pd *mpd = to_mpd(pd);
 
-	if (!pd->uobject)
-		free_pa_mkey(mdev, mpd->pa_lkey);
-
 	mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
 	kfree(mpd);
 
@@ -1245,18 +1187,10 @@
 	struct ib_srq_init_attr attr;
 	struct mlx5_ib_dev *dev;
 	struct ib_cq_init_attr cq_attr = {.cqe = 1};
-	u32 rsvd_lkey;
 	int ret = 0;
 
 	dev = container_of(devr, struct mlx5_ib_dev, devr);
 
-	ret = mlx5_core_query_special_context(dev->mdev, &rsvd_lkey);
-	if (ret) {
-		pr_err("Failed to query special context %d\n", ret);
-		return ret;
-	}
-	dev->ib_dev.local_dma_lkey = rsvd_lkey;
-
 	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
 	if (IS_ERR(devr->p0)) {
 		ret = PTR_ERR(devr->p0);
@@ -1418,6 +1352,7 @@
 	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
 	dev->ib_dev.owner		= THIS_MODULE;
 	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
+	dev->ib_dev.local_dma_lkey	= 0 /* not supported for now */;
 	dev->num_ports		= MLX5_CAP_GEN(mdev, num_ports);
 	dev->ib_dev.phys_port_cnt     = dev->num_ports;
 	dev->ib_dev.num_comp_vectors    =

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bb8cda7..22123b7 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h

@@ -103,7 +103,6 @@
 struct mlx5_ib_pd {
 	struct ib_pd		ibpd;
 	u32			pdn;
-	u32			pa_lkey;
 };
 
 /* Use macros here so that don't have to duplicate
@@ -213,7 +212,6 @@
 	int			uuarn;
 
 	int			create_type;
-	u32			pa_lkey;
 
 	/* Store signature errors */
 	bool			signature_en;

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index c745c6c..6f521a3 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c

@@ -925,8 +925,6 @@
 			err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
 			if (err)
 				mlx5_ib_dbg(dev, "err %d\n", err);
-			else
-				qp->pa_lkey = to_mpd(pd)->pa_lkey;
 		}
 
 		if (err)
@@ -2045,7 +2043,7 @@
 		mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
 	dseg->addr = cpu_to_be64(mfrpl->map);
 	dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
-	dseg->lkey = cpu_to_be32(pd->pa_lkey);
+	dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
 }
 
 static __be32 send_ieth(struct ib_send_wr *wr)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ca28736..4cd5428 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h

@@ -80,7 +80,7 @@
 	IPOIB_NUM_WC		  = 4,
 
 	IPOIB_MAX_PATH_REC_QUEUE  = 3,
-	IPOIB_MAX_MCAST_QUEUE	  = 3,
+	IPOIB_MAX_MCAST_QUEUE	  = 64,
 
 	IPOIB_FLAG_OPER_UP	  = 0,
 	IPOIB_FLAG_INITIALIZED	  = 1,
@@ -548,6 +548,8 @@
 
 int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
 		       union ib_gid *mgid, int set_qkey);
+int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
+struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
 
 int ipoib_init_qp(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 36536ce..f74316e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c

@@ -1149,6 +1149,9 @@
 	unsigned long dt;
 	unsigned long flags;
 	int i;
+	LIST_HEAD(remove_list);
+	struct ipoib_mcast *mcast, *tmcast;
+	struct net_device *dev = priv->dev;
 
 	if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
 		return;
@@ -1176,6 +1179,19 @@
 							  lockdep_is_held(&priv->lock))) != NULL) {
 			/* was the neigh idle for two GC periods */
 			if (time_after(neigh_obsolete, neigh->alive)) {
+				u8 *mgid = neigh->daddr + 4;
+
+				/* Is this multicast ? */
+				if (*mgid == 0xff) {
+					mcast = __ipoib_mcast_find(dev, mgid);
+
+					if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+						list_del(&mcast->list);
+						rb_erase(&mcast->rb_node, &priv->multicast_tree);
+						list_add_tail(&mcast->list, &remove_list);
+					}
+				}
+
 				rcu_assign_pointer(*np,
 						   rcu_dereference_protected(neigh->hnext,
 									     lockdep_is_held(&priv->lock)));
@@ -1191,6 +1207,8 @@
 
 out_unlock:
 	spin_unlock_irqrestore(&priv->lock, flags);
+	list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+		ipoib_mcast_leave(dev, mcast);
 }
 
 static void ipoib_reap_neigh(struct work_struct *work)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 09a1748..136cbef 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c

@@ -153,7 +153,7 @@
 	return mcast;
 }
 
-static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct rb_node *n = priv->multicast_tree.rb_node;
@@ -508,17 +508,19 @@
 		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
 
 		/*
-		 * Historically Linux IPoIB has never properly supported SEND
-		 * ONLY join. It emulated it by not providing all the required
-		 * attributes, which is enough to prevent group creation and
-		 * detect if there are full members or not. A major problem
-		 * with supporting SEND ONLY is detecting when the group is
-		 * auto-destroyed as IPoIB will cache the MLID..
+		 * Send-only IB Multicast joins do not work at the core
+		 * IB layer yet, so we can't use them here.  However,
+		 * we are emulating an Ethernet multicast send, which
+		 * does not require a multicast subscription and will
+		 * still send properly.  The most appropriate thing to
+		 * do is to create the group if it doesn't exist as that
+		 * most closely emulates the behavior, from a user space
+		 * application perspecitive, of Ethernet multicast
+		 * operation.  For now, we do a full join, maybe later
+		 * when the core IB layers support send only joins we
+		 * will use them.
 		 */
-#if 1
-		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-			comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
-#else
+#if 0
 		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
 			rec.join_state = 4;
 #endif
@@ -675,7 +677,7 @@
 	return 0;
 }
 
-static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret = 0;

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 1ace5d8..f58ff96 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c

@@ -97,6 +97,11 @@
 module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024");
 
+bool iser_always_reg = true;
+module_param_named(always_register, iser_always_reg, bool, S_IRUGO);
+MODULE_PARM_DESC(always_register,
+		 "Always register memory, even for continuous memory regions (default:true)");
+
 bool iser_pi_enable = false;
 module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO);
 MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 86f6583..a5edd6e 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h

@@ -611,6 +611,7 @@
 extern bool iser_pi_enable;
 extern int iser_pi_guard;
 extern unsigned int iser_max_sectors;
+extern bool iser_always_reg;
 
 int iser_assign_reg_ops(struct iser_device *device);
 

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 2493cc7..4c46d67 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c

@@ -803,11 +803,12 @@
 iser_reg_prot_sg(struct iscsi_iser_task *task,
 		 struct iser_data_buf *mem,
 		 struct iser_fr_desc *desc,
+		 bool use_dma_key,
 		 struct iser_mem_reg *reg)
 {
 	struct iser_device *device = task->iser_conn->ib_conn.device;
 
-	if (mem->dma_nents == 1)
+	if (use_dma_key)
 		return iser_reg_dma(device, mem, reg);
 
 	return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg);
@@ -817,11 +818,12 @@
 iser_reg_data_sg(struct iscsi_iser_task *task,
 		 struct iser_data_buf *mem,
 		 struct iser_fr_desc *desc,
+		 bool use_dma_key,
 		 struct iser_mem_reg *reg)
 {
 	struct iser_device *device = task->iser_conn->ib_conn.device;
 
-	if (mem->dma_nents == 1)
+	if (use_dma_key)
 		return iser_reg_dma(device, mem, reg);
 
 	return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg);
@@ -836,14 +838,17 @@
 	struct iser_mem_reg *reg = &task->rdma_reg[dir];
 	struct iser_mem_reg *data_reg;
 	struct iser_fr_desc *desc = NULL;
+	bool use_dma_key;
 	int err;
 
 	err = iser_handle_unaligned_buf(task, mem, dir);
 	if (unlikely(err))
 		return err;
 
-	if (mem->dma_nents != 1 ||
-	    scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) {
+	use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
+		       scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+
+	if (!use_dma_key) {
 		desc = device->reg_ops->reg_desc_get(ib_conn);
 		reg->mem_h = desc;
 	}
@@ -853,7 +858,7 @@
 	else
 		data_reg = &task->desc.data_reg;
 
-	err = iser_reg_data_sg(task, mem, desc, data_reg);
+	err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg);
 	if (unlikely(err))
 		goto err_reg;
 
@@ -866,7 +871,8 @@
 			if (unlikely(err))
 				goto err_reg;
 
-			err = iser_reg_prot_sg(task, mem, desc, prot_reg);
+			err = iser_reg_prot_sg(task, mem, desc,
+					       use_dma_key, prot_reg);
 			if (unlikely(err))
 				goto err_reg;
 		}

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index ae70cc1..85132d8 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c

@@ -133,11 +133,15 @@
 			     (unsigned long)comp);
 	}
 
-	device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
-				   IB_ACCESS_REMOTE_WRITE |
-				   IB_ACCESS_REMOTE_READ);
-	if (IS_ERR(device->mr))
-		goto dma_mr_err;
+	if (!iser_always_reg) {
+		int access = IB_ACCESS_LOCAL_WRITE |
+			     IB_ACCESS_REMOTE_WRITE |
+			     IB_ACCESS_REMOTE_READ;
+
+		device->mr = ib_get_dma_mr(device->pd, access);
+		if (IS_ERR(device->mr))
+			goto dma_mr_err;
+	}
 
 	INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
 				iser_event_handler);
@@ -147,7 +151,8 @@
 	return 0;
 
 handler_err:
-	ib_dereg_mr(device->mr);
+	if (device->mr)
+		ib_dereg_mr(device->mr);
 dma_mr_err:
 	for (i = 0; i < device->comps_used; i++)
 		tasklet_kill(&device->comps[i].tasklet);
@@ -173,7 +178,6 @@
 static void iser_free_device_ib_res(struct iser_device *device)
 {
 	int i;
-	BUG_ON(device->mr == NULL);
 
 	for (i = 0; i < device->comps_used; i++) {
 		struct iser_comp *comp = &device->comps[i];
@@ -184,7 +188,8 @@
 	}
 
 	(void)ib_unregister_event_handler(&device->event_handler);
-	(void)ib_dereg_mr(device->mr);
+	if (device->mr)
+		(void)ib_dereg_mr(device->mr);
 	ib_dealloc_pd(device->pd);
 
 	kfree(device->comps);

diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index f8baa89..1f7dd92 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c

@@ -2051,6 +2051,8 @@
 				reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA;
 			else
 				reg |= PORT_CONTROL_FRAME_MODE_DSA;
+			reg |= PORT_CONTROL_FORWARD_UNKNOWN |
+				PORT_CONTROL_FORWARD_UNKNOWN_MC;
 		}
 
 		if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||

diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index b7a0f78..9e59663 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c

@@ -1543,7 +1543,7 @@
 }
 
 /* Flush FLI data fifo. */
-static u32
+static int
 bfa_flash_fifo_flush(void __iomem *pci_bar)
 {
 	u32 i;
@@ -1573,11 +1573,11 @@
 }
 
 /* Read flash status. */
-static u32
+static int
 bfa_flash_status_read(void __iomem *pci_bar)
 {
 	union bfa_flash_dev_status_reg	dev_status;
-	u32				status;
+	int				status;
 	u32			ret_status;
 	int				i;
 
@@ -1611,11 +1611,11 @@
 }
 
 /* Start flash read operation. */
-static u32
+static int
 bfa_flash_read_start(void __iomem *pci_bar, u32 offset, u32 len,
 		     char *buf)
 {
-	u32 status;
+	int status;
 
 	/* len must be mutiple of 4 and not exceeding fifo size */
 	if (len == 0 || len > BFA_FLASH_FIFO_SIZE || (len & 0x03) != 0)
@@ -1703,7 +1703,8 @@
 bfa_flash_raw_read(void __iomem *pci_bar, u32 offset, char *buf,
 		   u32 len)
 {
-	u32 n, status;
+	u32 n;
+	int status;
 	u32 off, l, s, residue, fifo_sz;
 
 	residue = len;

diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index cc2d8b4..253f8ed 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c

@@ -816,7 +816,7 @@
 	struct net_device *ndev;
 	struct hip04_priv *priv;
 	struct resource *res;
-	unsigned int irq;
+	int irq;
 	int ret;
 
 	ndev = alloc_etherdev(sizeof(struct hip04_priv));

diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h
index 28df374..ac02c67 100644
--- a/drivers/net/ethernet/ibm/emac/core.h
+++ b/drivers/net/ethernet/ibm/emac/core.h

@@ -460,8 +460,8 @@
 	u32 index;
 };
 
-#define EMAC_ETHTOOL_REGS_VER		0
-#define EMAC4_ETHTOOL_REGS_VER		1
-#define EMAC4SYNC_ETHTOOL_REGS_VER	2
+#define EMAC_ETHTOOL_REGS_VER		3
+#define EMAC4_ETHTOOL_REGS_VER		4
+#define EMAC4SYNC_ETHTOOL_REGS_VER	5
 
 #endif /* __IBM_NEWEMAC_CORE_H */

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 3e0d200..62488a6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c

@@ -946,6 +946,13 @@
 	/* take the lock before we start messing with the ring */
 	mutex_lock(&hw->aq.arq_mutex);
 
+	if (hw->aq.arq.count == 0) {
+		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+			   "AQRX: Admin queue not initialized.\n");
+		ret_code = I40E_ERR_QUEUE_EMPTY;
+		goto clean_arq_element_err;
+	}
+
 	/* set next_to_use to head */
 	ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK);
 	if (ntu == ntc) {
@@ -1007,6 +1014,8 @@
 	/* Set pending if needed, unlock and return */
 	if (pending != NULL)
 		*pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc);
+
+clean_arq_element_err:
 	mutex_unlock(&hw->aq.arq_mutex);
 
 	if (i40e_is_nvm_update_op(&e->desc)) {

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 851c1a15..2fdf978 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c

@@ -2672,7 +2672,8 @@
 		rx_ctx.lrxqthresh = 2;
 	rx_ctx.crcstrip = 1;
 	rx_ctx.l2tsel = 1;
-	rx_ctx.showiv = 1;
+	/* this controls whether VLAN is stripped from inner headers */
+	rx_ctx.showiv = 0;
 #ifdef I40E_FCOE
 	rx_ctx.fc_ena = (vsi->type == I40E_VSI_FCOE);
 #endif

diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
index f08450b..929d471 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c

@@ -887,6 +887,13 @@
 	/* take the lock before we start messing with the ring */
 	mutex_lock(&hw->aq.arq_mutex);
 
+	if (hw->aq.arq.count == 0) {
+		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+			   "AQRX: Admin queue not initialized.\n");
+		ret_code = I40E_ERR_QUEUE_EMPTY;
+		goto clean_arq_element_err;
+	}
+
 	/* set next_to_use to head */
 	ntu = (rd32(hw, hw->aq.arq.head) & I40E_VF_ARQH1_ARQH_MASK);
 	if (ntu == ntc) {
@@ -948,6 +955,8 @@
 	/* Set pending if needed, unlock and return */
 	if (pending != NULL)
 		*pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc);
+
+clean_arq_element_err:
 	mutex_unlock(&hw->aq.arq_mutex);
 
 	return ret_code;

diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index bd9ea0d..1d4e2e0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c

@@ -1184,10 +1184,11 @@
 	if (prot == MLX4_PROT_ETH) {
 		/* manage the steering entry for promisc mode */
 		if (new_entry)
-			new_steering_entry(dev, port, steer, index, qp->qpn);
+			err = new_steering_entry(dev, port, steer,
+						 index, qp->qpn);
 		else
-			existing_steering_entry(dev, port, steer,
-						index, qp->qpn);
+			err = existing_steering_entry(dev, port, steer,
+						      index, qp->qpn);
 	}
 	if (err && link && index != -1) {
 		if (index < dev->caps.num_mgms)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index aa0d5ff..9335e5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c

@@ -200,25 +200,3 @@
 
 	return err;
 }
-
-int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey)
-{
-	struct mlx5_cmd_query_special_contexts_mbox_in in;
-	struct mlx5_cmd_query_special_contexts_mbox_out out;
-	int err;
-
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
-	*rsvd_lkey = be32_to_cpu(out.resd_lkey);
-
-	return err;
-}
-EXPORT_SYMBOL(mlx5_core_query_special_context);

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 2b32e0c..b4f2123 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c

@@ -6081,7 +6081,7 @@
 {
 	void __iomem *ioaddr = tp->mmio_addr;
 	struct pci_dev *pdev = tp->pci_dev;
-	u16 rg_saw_cnt;
+	int rg_saw_cnt;
 	u32 data;
 	static const struct ephy_info e_info_8168h_1[] = {
 		{ 0x1e, 0x0800,	0x0001 },

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index dd652f2..108a311 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c

@@ -299,9 +299,10 @@
 	 * Unbound PCI devices are always put in D0, regardless of
 	 * runtime PM status.  During probe, the device is set to
 	 * active and the usage count is incremented.  If the driver
-	 * supports runtime PM, it should call pm_runtime_put_noidle()
-	 * in its probe routine and pm_runtime_get_noresume() in its
-	 * remove routine.
+	 * supports runtime PM, it should call pm_runtime_put_noidle(),
+	 * or any other runtime PM helper function decrementing the usage
+	 * count, in its probe routine and pm_runtime_get_noresume() in
+	 * its remove routine.
 	 */
 	pm_runtime_get_sync(dev);
 	pci_dev->driver = pci_drv;

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7235c48..43856d1 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h

@@ -217,6 +217,7 @@
 
 int acpi_pci_irq_enable (struct pci_dev *dev);
 void acpi_penalize_isa_irq(int irq, int active);
+bool acpi_isa_irq_available(int irq);
 void acpi_penalize_sci_irq(int irq, int trigger, int polarity);
 void acpi_pci_irq_disable (struct pci_dev *dev);
 

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8eb3b19..250b1ff 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h

@@ -402,17 +402,6 @@
 	u8			rsvd[8];
 };
 
-struct mlx5_cmd_query_special_contexts_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_cmd_query_special_contexts_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32                  dump_fill_mkey;
-	__be32                  resd_lkey;
-};
-
 struct mlx5_cmd_layout {
 	u8		type;
 	u8		rsvd0[3];

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 27b53f9..8b6d6f2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h

@@ -845,7 +845,6 @@
 int mlx5_register_interface(struct mlx5_interface *intf);
 void mlx5_unregister_interface(struct mlx5_interface *intf);
 int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
-int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey);
 
 struct mlx5_profile {
 	u64	mask;

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2b0a30a..4398411 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h

@@ -2708,7 +2708,7 @@
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
 	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
-		 skb_checksum_start_offset(skb) <= len)
+		 skb_checksum_start_offset(skb) < 0)
 		skb->ip_summed = CHECKSUM_NONE;
 }
 

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 4a167b3..cb1b9bb 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h

@@ -63,7 +63,11 @@
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
 };
-#define unix_sk(__sk) ((struct unix_sock *)__sk)
+
+static inline struct unix_sock *unix_sk(struct sock *sk)
+{
+	return (struct unix_sock *)sk;
+}
 
 #define peer_wait peer_wq.wait
 

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 805a95a..830f8a7 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c

@@ -31,7 +31,6 @@
 static const char fmt_hex[] = "%#x\n";
 static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
-static const char fmt_udec[] = "%u\n";
 static const char fmt_ulong[] = "%lu\n";
 static const char fmt_u64[] = "%llu\n";
 
@@ -202,7 +201,7 @@
 	if (netif_running(netdev)) {
 		struct ethtool_cmd cmd;
 		if (!__ethtool_get_settings(netdev, &cmd))
-			ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
+			ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd));
 	}
 	rtnl_unlock();
 	return ret;

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index dad4dd3..fab4599 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c

@@ -2958,11 +2958,12 @@
  */
 unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 {
+	unsigned char *data = skb->data;
+
 	BUG_ON(len > skb->len);
-	skb->len -= len;
-	BUG_ON(skb->len < skb->data_len);
-	skb_postpull_rcsum(skb, skb->data, len);
-	return skb->data += len;
+	__skb_pull(skb, len);
+	skb_postpull_rcsum(skb, data, len);
+	return skb->data;
 }
 EXPORT_SYMBOL_GPL(skb_pull_rcsum);
 

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index cce9738..7d91f46 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c

@@ -458,12 +458,17 @@
 static int dsa_slave_port_attr_set(struct net_device *dev,
 				   struct switchdev_attr *attr)
 {
-	int ret = 0;
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+	int ret;
 
 	switch (attr->id) {
 	case SWITCHDEV_ATTR_PORT_STP_STATE:
-		if (attr->trans == SWITCHDEV_TRANS_COMMIT)
-			ret = dsa_slave_stp_update(dev, attr->u.stp_state);
+		if (attr->trans == SWITCHDEV_TRANS_PREPARE)
+			ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP;
+		else
+			ret = ds->drv->port_stp_update(ds, p->port,
+						       attr->u.stp_state);
 		break;
 	default:
 		ret = -EOPNOTSUPP;

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6fcbd21..690bcbc 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c

@@ -340,6 +340,7 @@
 	fl4.flowi4_tos = tos;
 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
 	fl4.flowi4_tun_key.tun_id = 0;
+	fl4.flowi4_flags = 0;
 
 	no_addr = idev->ifa_list == NULL;
 

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c6ad99a..c81deb8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c

@@ -1737,6 +1737,7 @@
 	fl4.flowi4_mark = skb->mark;
 	fl4.flowi4_tos = tos;
 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+	fl4.flowi4_flags = 0;
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
 	err = fib_lookup(net, &fl4, &res, 0);

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f204089..cb32ce2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c

@@ -1193,7 +1193,8 @@
 
 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
-	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
+	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
+	    fl6->flowi6_oif)
 		flags |= RT6_LOOKUP_F_IFACE;
 
 	if (!ipv6_addr_any(&fl6->saddr))

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index f6b090d..afca2eb 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c

@@ -1319,7 +1319,7 @@
 	tunnel = container_of(work, struct l2tp_tunnel, del_work);
 	sk = l2tp_tunnel_sock_lookup(tunnel);
 	if (!sk)
-		return;
+		goto out;
 
 	sock = sk->sk_socket;
 
@@ -1341,6 +1341,8 @@
 	}
 
 	l2tp_tunnel_sock_put(sk);
+out:
+	l2tp_tunnel_dec_refcount(tunnel);
 }
 
 /* Create a socket for the tunnel, if one isn't set up by
@@ -1636,8 +1638,13 @@
  */
 int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
 {
+	l2tp_tunnel_inc_refcount(tunnel);
 	l2tp_tunnel_closeall(tunnel);
-	return (false == queue_work(l2tp_wq, &tunnel->del_work));
+	if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
+		l2tp_tunnel_dec_refcount(tunnel);
+		return 1;
+	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
 

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 197c3f5..b00f1f9 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c

@@ -1208,20 +1208,22 @@
  *   within this document.
  *
  * Our basic strategy is to round-robin transports in priorities
- * according to sctp_state_prio_map[] e.g., if no such
+ * according to sctp_trans_score() e.g., if no such
  * transport with state SCTP_ACTIVE exists, round-robin through
  * SCTP_UNKNOWN, etc. You get the picture.
  */
-static const u8 sctp_trans_state_to_prio_map[] = {
-	[SCTP_ACTIVE]	= 3,	/* best case */
-	[SCTP_UNKNOWN]	= 2,
-	[SCTP_PF]	= 1,
-	[SCTP_INACTIVE] = 0,	/* worst case */
-};
-
 static u8 sctp_trans_score(const struct sctp_transport *trans)
 {
-	return sctp_trans_state_to_prio_map[trans->state];
+	switch (trans->state) {
+	case SCTP_ACTIVE:
+		return 3;	/* best case */
+	case SCTP_UNKNOWN:
+		return 2;
+	case SCTP_PF:
+		return 1;
+	default: /* case SCTP_INACTIVE */
+		return 0;	/* worst case */
+	}
 }
 
 static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1,

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 35df126..6098d4c 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c

@@ -244,12 +244,13 @@
 	int error;
 	struct sctp_transport *transport = (struct sctp_transport *) peer;
 	struct sctp_association *asoc = transport->asoc;
-	struct net *net = sock_net(asoc->base.sk);
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
 
 	/* Check whether a task is in the sock.  */
 
-	bh_lock_sock(asoc->base.sk);
-	if (sock_owned_by_user(asoc->base.sk)) {
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
 		pr_debug("%s: sock is busy\n", __func__);
 
 		/* Try again later.  */
@@ -272,10 +273,10 @@
 			   transport, GFP_ATOMIC);
 
 	if (error)
-		asoc->base.sk->sk_err = -error;
+		sk->sk_err = -error;
 
 out_unlock:
-	bh_unlock_sock(asoc->base.sk);
+	bh_unlock_sock(sk);
 	sctp_transport_put(transport);
 }
 
@@ -285,11 +286,12 @@
 static void sctp_generate_timeout_event(struct sctp_association *asoc,
 					sctp_event_timeout_t timeout_type)
 {
-	struct net *net = sock_net(asoc->base.sk);
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
 	int error = 0;
 
-	bh_lock_sock(asoc->base.sk);
-	if (sock_owned_by_user(asoc->base.sk)) {
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
 		pr_debug("%s: sock is busy: timer %d\n", __func__,
 			 timeout_type);
 
@@ -312,10 +314,10 @@
 			   (void *)timeout_type, GFP_ATOMIC);
 
 	if (error)
-		asoc->base.sk->sk_err = -error;
+		sk->sk_err = -error;
 
 out_unlock:
-	bh_unlock_sock(asoc->base.sk);
+	bh_unlock_sock(sk);
 	sctp_association_put(asoc);
 }
 
@@ -365,10 +367,11 @@
 	int error = 0;
 	struct sctp_transport *transport = (struct sctp_transport *) data;
 	struct sctp_association *asoc = transport->asoc;
-	struct net *net = sock_net(asoc->base.sk);
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
 
-	bh_lock_sock(asoc->base.sk);
-	if (sock_owned_by_user(asoc->base.sk)) {
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
 		pr_debug("%s: sock is busy\n", __func__);
 
 		/* Try again later.  */
@@ -388,11 +391,11 @@
 			   asoc->state, asoc->ep, asoc,
 			   transport, GFP_ATOMIC);
 
-	 if (error)
-		 asoc->base.sk->sk_err = -error;
+	if (error)
+		sk->sk_err = -error;
 
 out_unlock:
-	bh_unlock_sock(asoc->base.sk);
+	bh_unlock_sock(sk);
 	sctp_transport_put(transport);
 }
 
@@ -403,10 +406,11 @@
 {
 	struct sctp_transport *transport = (struct sctp_transport *) data;
 	struct sctp_association *asoc = transport->asoc;
-	struct net *net = sock_net(asoc->base.sk);
+	struct sock *sk = asoc->base.sk;
+	struct net *net = sock_net(sk);
 
-	bh_lock_sock(asoc->base.sk);
-	if (sock_owned_by_user(asoc->base.sk)) {
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk)) {
 		pr_debug("%s: sock is busy\n", __func__);
 
 		/* Try again later.  */
@@ -427,7 +431,7 @@
 		   asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
 
 out_unlock:
-	bh_unlock_sock(asoc->base.sk);
+	bh_unlock_sock(sk);
 	sctp_association_put(asoc);
 }
 

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index cb25c89..f1e8daf 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c

@@ -39,25 +39,6 @@
 fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
 	    struct rpcrdma_create_data_internal *cdata)
 {
-	struct ib_device_attr *devattr = &ia->ri_devattr;
-	struct ib_mr *mr;
-
-	/* Obtain an lkey to use for the regbufs, which are
-	 * protected from remote access.
-	 */
-	if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
-		ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
-	} else {
-		mr = ib_get_dma_mr(ia->ri_pd, IB_ACCESS_LOCAL_WRITE);
-		if (IS_ERR(mr)) {
-			pr_err("%s: ib_get_dma_mr for failed with %lX\n",
-			       __func__, PTR_ERR(mr));
-			return -ENOMEM;
-		}
-		ia->ri_dma_lkey = ia->ri_dma_mr->lkey;
-		ia->ri_dma_mr = mr;
-	}
-
 	return 0;
 }
 

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index d6653f5..5318951 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c

@@ -189,11 +189,6 @@
 	struct ib_device_attr *devattr = &ia->ri_devattr;
 	int depth, delta;
 
-	/* Obtain an lkey to use for the regbufs, which are
-	 * protected from remote access.
-	 */
-	ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
-
 	ia->ri_max_frmr_depth =
 			min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
 			      devattr->max_fast_reg_page_list_len);

diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 72cf8b1..617b76f 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c

@@ -23,7 +23,6 @@
 physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
 		 struct rpcrdma_create_data_internal *cdata)
 {
-	struct ib_device_attr *devattr = &ia->ri_devattr;
 	struct ib_mr *mr;
 
 	/* Obtain an rkey to use for RPC data payloads.
@@ -37,15 +36,8 @@
 		       __func__, PTR_ERR(mr));
 		return -ENOMEM;
 	}
+
 	ia->ri_dma_mr = mr;
-
-	/* Obtain an lkey to use for regbufs.
-	 */
-	if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
-		ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
-	else
-		ia->ri_dma_lkey = ia->ri_dma_mr->lkey;
-
 	return 0;
 }
 

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 6829967..eb081ad 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c

@@ -1252,7 +1252,7 @@
 		goto out_free;
 
 	iov->length = size;
-	iov->lkey = ia->ri_dma_lkey;
+	iov->lkey = ia->ri_pd->local_dma_lkey;
 	rb->rg_size = size;
 	rb->rg_owner = NULL;
 	return rb;

diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 0251222..c09414e 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h

@@ -65,7 +65,6 @@
 	struct rdma_cm_id 	*ri_id;
 	struct ib_pd		*ri_pd;
 	struct ib_mr		*ri_dma_mr;
-	u32			ri_dma_lkey;
 	struct completion	ri_done;
 	int			ri_async_rc;
 	unsigned int		ri_max_frmr_depth;

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 03ee4d3..ef31b40 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c

@@ -2179,8 +2179,21 @@
 			if (UNIXCB(skb).fp)
 				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
 
-			sk_peek_offset_fwd(sk, chunk);
+			if (skip) {
+				sk_peek_offset_fwd(sk, chunk);
+				skip -= chunk;
+			}
 
+			if (UNIXCB(skb).fp)
+				break;
+
+			last = skb;
+			last_len = skb->len;
+			unix_state_lock(sk);
+			skb = skb_peek_next(skb, &sk->sk_receive_queue);
+			if (skb)
+				goto again;
+			unix_state_unlock(sk);
 			break;
 		}
 	} while (size);

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 9655cb4..bde0ef1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c

@@ -71,8 +71,11 @@
 unsigned int extra_msr_offset64;
 unsigned int extra_delta_offset32;
 unsigned int extra_delta_offset64;
+unsigned int aperf_mperf_multiplier = 1;
 int do_smi;
 double bclk;
+double base_hz;
+double tsc_tweak = 1.0;
 unsigned int show_pkg;
 unsigned int show_core;
 unsigned int show_cpu;
@@ -502,7 +505,7 @@
 	/* %Busy */
 	if (has_aperf) {
 		if (!skip_c0)
-			outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc);
+			outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
 		else
 			outp += sprintf(outp, "********");
 	}
@@ -510,7 +513,7 @@
 	/* Bzy_MHz */
 	if (has_aperf)
 		outp += sprintf(outp, "%8.0f",
-			1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
+			1.0 * t->tsc * tsc_tweak / units * t->aperf / t->mperf / interval_float);
 
 	/* TSC_MHz */
 	outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
@@ -984,6 +987,8 @@
 			return -3;
 		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
 			return -4;
+		t->aperf = t->aperf * aperf_mperf_multiplier;
+		t->mperf = t->mperf * aperf_mperf_multiplier;
 	}
 
 	if (do_smi) {
@@ -1149,6 +1154,19 @@
 int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 
+
+static void
+calculate_tsc_tweak()
+{
+	unsigned long long msr;
+	unsigned int base_ratio;
+
+	get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
+	base_ratio = (msr >> 8) & 0xFF;
+	base_hz = base_ratio * bclk * 1000000;
+	tsc_tweak = base_hz / tsc_hz;
+}
+
 static void
 dump_nhm_platform_info(void)
 {
@@ -1926,8 +1944,6 @@
 
 	switch (model) {
 	case 0x3A:	/* IVB */
-	case 0x3E:	/* IVB Xeon */
-
 	case 0x3C:	/* HSW */
 	case 0x3F:	/* HSX */
 	case 0x45:	/* HSW */
@@ -2543,6 +2559,13 @@
 	return 0;
 }
 
+unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
+{
+	if (is_knl(family, model))
+		return 1024;
+	return 1;
+}
+
 #define SLM_BCLK_FREQS 5
 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
 
@@ -2744,6 +2767,9 @@
 		}
 	}
 
+	if (has_aperf)
+		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
+
 	do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
 	do_snb_cstates = has_snb_msrs(family, model);
 	do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
@@ -2762,6 +2788,9 @@
 	if (debug)
 		dump_cstate_pstate_config_info();
 
+	if (has_skl_msrs(family, model))
+		calculate_tsc_tweak();
+
 	return;
 }
 
@@ -3090,7 +3119,7 @@
 }
 
 void print_version() {
-	fprintf(stderr, "turbostat version 4.7 17-June, 2015"
+	fprintf(stderr, "turbostat version 4.8 26-Sep, 2015"
 		" - Len Brown <lenb@kernel.org>\n");
 }
commit	bde17b90dd9712cb61a7ab0c1ccd0f7f6aa57957	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Thu Oct 01 22:20:11 2015 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	Thu Oct 01 22:20:11 2015 -0400
tree	d4597bfde4f5d4a3a5729e5534c44993c6216352
parent	1bca1000fa71a1092947b4a51928abe80a3316d2 [diff]
parent	676bd99178cd962ed24ffdad222b7069d330a969 [diff]