Merge branch 'pci/virtualization'
- probe for device reset support during enumeration instead of runtime
(Bjorn Helgaas)
- add ACS quirk for Ampere (née APM) root ports (Feng Kan)
- add function 1 DMA alias quirk for Marvell 88SE9220 (Thomas
Vincent-Cross)
- protect device restore with device lock (Sinan Kaya)
- handle failure of FLR gracefully (Sinan Kaya)
- handle CRS (config retry status) after device resets (Sinan Kaya)
- skip various config reads for SR-IOV VFs as an optimization (KarimAllah
Ahmed)
* pci/virtualization:
PCI/IOV: Add missing prototypes for powerpc pcibios interfaces
PCI/IOV: Use VF0 cached config registers for other VFs
PCI/IOV: Skip BAR sizing for VFs
PCI/IOV: Skip INTx config reads for VFs
PCI: Wait for device to become ready after secondary bus reset
PCI: Add a return type for pci_reset_bridge_secondary_bus()
PCI: Wait for device to become ready after a power management reset
PCI: Rename pci_flr_wait() to pci_dev_wait() and make it generic
PCI: Handle FLR failure and allow other reset types
PCI: Protect restore with device lock to be consistent
PCI: Add function 1 DMA alias quirk for Marvell 88SE9220
PCI: Add ACS quirk for Ampere root ports
PCI: Remove redundant probes for device reset support
PCI: Probe for device reset support during enumeration
Conflicts:
include/linux/pci.h
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 538de90..8adf4a6 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -112,6 +112,29 @@
return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
}
+static void pci_read_vf_config_common(struct pci_dev *virtfn)
+{
+ struct pci_dev *physfn = virtfn->physfn;
+
+ /*
+ * Some config registers are the same across all associated VFs.
+ * Read them once from VF0 so we can skip reading them from the
+ * other VFs.
+ *
+ * PCIe r4.0, sec 9.3.4.1, technically doesn't require all VFs to
+ * have the same Revision ID and Subsystem ID, but we assume they
+ * do.
+ */
+ pci_read_config_dword(virtfn, PCI_CLASS_REVISION,
+ &physfn->sriov->class);
+ pci_read_config_byte(virtfn, PCI_HEADER_TYPE,
+ &physfn->sriov->hdr_type);
+ pci_read_config_word(virtfn, PCI_SUBSYSTEM_VENDOR_ID,
+ &physfn->sriov->subsystem_vendor);
+ pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID,
+ &physfn->sriov->subsystem_device);
+}
+
int pci_iov_add_virtfn(struct pci_dev *dev, int id)
{
int i;
@@ -134,13 +157,17 @@
virtfn->devfn = pci_iov_virtfn_devfn(dev, id);
virtfn->vendor = dev->vendor;
virtfn->device = iov->vf_device;
+ virtfn->is_virtfn = 1;
+ virtfn->physfn = pci_dev_get(dev);
+
+ if (id == 0)
+ pci_read_vf_config_common(virtfn);
+
rc = pci_setup_device(virtfn);
if (rc)
- goto failed0;
+ goto failed1;
virtfn->dev.parent = dev->dev.parent;
- virtfn->physfn = pci_dev_get(dev);
- virtfn->is_virtfn = 1;
virtfn->multifunction = 0;
for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
@@ -161,10 +188,10 @@
sprintf(buf, "virtfn%u", id);
rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
if (rc)
- goto failed1;
+ goto failed2;
rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
if (rc)
- goto failed2;
+ goto failed3;
kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
@@ -172,11 +199,12 @@
return 0;
-failed2:
+failed3:
sysfs_remove_link(&dev->dev.kobj, buf);
+failed2:
+ pci_stop_and_remove_bus_device(virtfn);
failed1:
pci_dev_put(dev);
- pci_stop_and_remove_bus_device(virtfn);
failed0:
virtfn_remove_bus(dev->bus, bus);
failed:
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 478dde8..116e97d 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1515,11 +1515,10 @@
/* Active State Power Management */
pcie_aspm_create_sysfs_dev_files(dev);
- if (!pci_probe_reset_function(dev)) {
+ if (dev->reset_fn) {
retval = device_create_file(&dev->dev, &reset_attr);
if (retval)
goto error;
- dev->reset_fn = 1;
}
return 0;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index bfac1d5..43aeecc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -127,6 +127,9 @@
}
__setup("pcie_port_pm=", pcie_port_pm_setup);
+/* Time to wait after a reset for device to become responsive */
+#define PCIE_RESET_READY_POLL_MS 60000
+
/**
* pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
* @bus: pointer to PCI bus structure to search
@@ -3969,20 +3972,13 @@
}
EXPORT_SYMBOL(pci_wait_for_pending_transaction);
-static void pci_flr_wait(struct pci_dev *dev)
+static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
{
- int delay = 1, timeout = 60000;
+ int delay = 1;
u32 id;
/*
- * Per PCIe r3.1, sec 6.6.2, a device must complete an FLR within
- * 100ms, but may silently discard requests while the FLR is in
- * progress. Wait 100ms before trying to access the device.
- */
- msleep(100);
-
- /*
- * After 100ms, the device should not silently discard config
+ * After reset, the device should not silently discard config
* requests, but it may still indicate that it needs more time by
* responding to them with CRS completions. The Root Port will
* generally synthesize ~0 data to complete the read (except when
@@ -3996,14 +3992,14 @@
pci_read_config_dword(dev, PCI_COMMAND, &id);
while (id == ~0) {
if (delay > timeout) {
- pci_warn(dev, "not ready %dms after FLR; giving up\n",
- 100 + delay - 1);
- return;
+ pci_warn(dev, "not ready %dms after %s; giving up\n",
+ delay - 1, reset_type);
+ return -ENOTTY;
}
if (delay > 1000)
- pci_info(dev, "not ready %dms after FLR; waiting\n",
- 100 + delay - 1);
+ pci_info(dev, "not ready %dms after %s; waiting\n",
+ delay - 1, reset_type);
msleep(delay);
delay *= 2;
@@ -4011,7 +4007,10 @@
}
if (delay > 1000)
- pci_info(dev, "ready %dms after FLR\n", 100 + delay - 1);
+ pci_info(dev, "ready %dms after %s\n", delay - 1,
+ reset_type);
+
+ return 0;
}
/**
@@ -4040,13 +4039,21 @@
* device supports FLR before calling this function, e.g. by using the
* pcie_has_flr() helper.
*/
-void pcie_flr(struct pci_dev *dev)
+int pcie_flr(struct pci_dev *dev)
{
if (!pci_wait_for_pending_transaction(dev))
pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");
pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
- pci_flr_wait(dev);
+
+ /*
+ * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
+ * 100ms, but may silently discard requests while the FLR is in
+ * progress. Wait 100ms before trying to access the device.
+ */
+ msleep(100);
+
+ return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
}
EXPORT_SYMBOL_GPL(pcie_flr);
@@ -4079,8 +4086,16 @@
pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");
pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
- pci_flr_wait(dev);
- return 0;
+
+ /*
+ * Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,
+ * updated 27 July 2006; a device must complete an FLR within
+ * 100ms, but may silently discard requests while the FLR is in
+ * progress. Wait 100ms before trying to access the device.
+ */
+ msleep(100);
+
+ return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
}
/**
@@ -4125,7 +4140,7 @@
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
pci_dev_d3_sleep(dev);
- return 0;
+ return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS);
}
void pci_reset_secondary_bus(struct pci_dev *dev)
@@ -4167,9 +4182,11 @@
* Use the bridge control register to assert reset on the secondary bus.
* Devices on the secondary bus are left in power-on state.
*/
-void pci_reset_bridge_secondary_bus(struct pci_dev *dev)
+int pci_reset_bridge_secondary_bus(struct pci_dev *dev)
{
pcibios_reset_secondary_bus(dev);
+
+ return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
}
EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus);
@@ -4332,8 +4349,9 @@
if (rc != -ENOTTY)
return rc;
if (pcie_has_flr(dev)) {
- pcie_flr(dev);
- return 0;
+ rc = pcie_flr(dev);
+ if (rc != -ENOTTY)
+ return rc;
}
rc = pci_af_flr(dev, 0);
if (rc != -ENOTTY)
@@ -4403,9 +4421,8 @@
{
int rc;
- rc = pci_probe_reset_function(dev);
- if (rc)
- return rc;
+ if (!dev->reset_fn)
+ return -ENOTTY;
pci_dev_lock(dev);
pci_dev_save_and_disable(dev);
@@ -4440,9 +4457,8 @@
{
int rc;
- rc = pci_probe_reset_function(dev);
- if (rc)
- return rc;
+ if (!dev->reset_fn)
+ return -ENOTTY;
pci_dev_save_and_disable(dev);
@@ -4464,18 +4480,17 @@
{
int rc;
- rc = pci_probe_reset_function(dev);
- if (rc)
- return rc;
+ if (!dev->reset_fn)
+ return -ENOTTY;
if (!pci_dev_trylock(dev))
return -EAGAIN;
pci_dev_save_and_disable(dev);
rc = __pci_reset_function_locked(dev);
+ pci_dev_restore(dev);
pci_dev_unlock(dev);
- pci_dev_restore(dev);
return rc;
}
EXPORT_SYMBOL_GPL(pci_try_reset_function);
@@ -4683,7 +4698,9 @@
list_for_each_entry(dev, &slot->bus->devices, bus_list) {
if (!dev->slot || dev->slot != slot)
continue;
+ pci_dev_lock(dev);
pci_dev_restore(dev);
+ pci_dev_unlock(dev);
if (dev->subordinate)
pci_bus_restore(dev->subordinate);
}
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 38b2596..6722302 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -293,6 +293,10 @@
u16 driver_max_VFs; /* Max num VFs driver supports */
struct pci_dev *dev; /* Lowest numbered PF */
struct pci_dev *self; /* This PF */
+ u32 class; /* VF device */
+ u8 hdr_type; /* VF header type */
+ u16 subsystem_vendor; /* VF subsystem vendor */
+ u16 subsystem_device; /* VF subsystem device */
resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */
bool drivers_autoprobe; /* Auto probing of VFs by driver */
};
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index f26fcbf..caa0710 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -329,6 +329,10 @@
if (dev->non_compliant_bars)
return;
+ /* Per PCIe r4.0, sec 9.3.4.1.11, the VF BARs are all RO Zero */
+ if (dev->is_virtfn)
+ return;
+
for (pos = 0; pos < howmany; pos++) {
struct resource *res = &dev->resource[pos];
reg = PCI_BASE_ADDRESS_0 + (pos << 2);
@@ -1240,6 +1244,13 @@
{
unsigned char irq;
+ /* VFs are not allowed to use INTx, so skip the config reads */
+ if (dev->is_virtfn) {
+ dev->pin = 0;
+ dev->irq = 0;
+ return;
+ }
+
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq);
dev->pin = irq;
if (irq)
@@ -1399,6 +1410,43 @@
return PCI_CFG_SPACE_SIZE;
}
+static u32 pci_class(struct pci_dev *dev)
+{
+ u32 class;
+
+#ifdef CONFIG_PCI_IOV
+ if (dev->is_virtfn)
+ return dev->physfn->sriov->class;
+#endif
+ pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+ return class;
+}
+
+static void pci_subsystem_ids(struct pci_dev *dev, u16 *vendor, u16 *device)
+{
+#ifdef CONFIG_PCI_IOV
+ if (dev->is_virtfn) {
+ *vendor = dev->physfn->sriov->subsystem_vendor;
+ *device = dev->physfn->sriov->subsystem_device;
+ return;
+ }
+#endif
+ pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, vendor);
+ pci_read_config_word(dev, PCI_SUBSYSTEM_ID, device);
+}
+
+static u8 pci_hdr_type(struct pci_dev *dev)
+{
+ u8 hdr_type;
+
+#ifdef CONFIG_PCI_IOV
+ if (dev->is_virtfn)
+ return dev->physfn->sriov->hdr_type;
+#endif
+ pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type);
+ return hdr_type;
+}
+
#define LEGACY_IO_RESOURCE (IORESOURCE_IO | IORESOURCE_PCI_FIXED)
static void pci_msi_setup_pci_dev(struct pci_dev *dev)
@@ -1464,8 +1512,7 @@
struct pci_bus_region region;
struct resource *res;
- if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
- return -EIO;
+ hdr_type = pci_hdr_type(dev);
dev->sysdata = dev->bus->sysdata;
dev->dev.parent = dev->bus->bridge;
@@ -1487,7 +1534,8 @@
dev->bus->number, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn));
- pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+ class = pci_class(dev);
+
dev->revision = class & 0xff;
dev->class = class >> 8; /* upper 3 bytes */
@@ -1527,8 +1575,8 @@
goto bad;
pci_read_irq(dev);
pci_read_bases(dev, 6, PCI_ROM_ADDRESS);
- pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
- pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device);
+
+ pci_subsystem_ids(dev, &dev->subsystem_vendor, &dev->subsystem_device);
/*
* Do the ugly legacy mode stuff here rather than broken chip
@@ -2131,6 +2179,9 @@
/* Advanced Error Reporting */
pci_aer_init(dev);
+
+ if (pci_probe_reset_function(dev) == 0)
+ dev->reset_fn = 1;
}
/*
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8bf0ad9..df75bc8 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3888,6 +3888,9 @@
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c46 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x91a0,
quirk_dma_func1_alias);
+/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c127 */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9220,
+ quirk_dma_func1_alias);
/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c49 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230,
quirk_dma_func1_alias);
@@ -4506,6 +4509,15 @@
{ PCI_VENDOR_ID_CAVIUM, PCI_ANY_ID, pci_quirk_cavium_acs },
/* APM X-Gene */
{ PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs },
+ /* Ampere Computing */
+ { PCI_VENDOR_ID_AMPERE, 0xE005, pci_quirk_xgene_acs },
+ { PCI_VENDOR_ID_AMPERE, 0xE006, pci_quirk_xgene_acs },
+ { PCI_VENDOR_ID_AMPERE, 0xE007, pci_quirk_xgene_acs },
+ { PCI_VENDOR_ID_AMPERE, 0xE008, pci_quirk_xgene_acs },
+ { PCI_VENDOR_ID_AMPERE, 0xE009, pci_quirk_xgene_acs },
+ { PCI_VENDOR_ID_AMPERE, 0xE00A, pci_quirk_xgene_acs },
+ { PCI_VENDOR_ID_AMPERE, 0xE00B, pci_quirk_xgene_acs },
+ { PCI_VENDOR_ID_AMPERE, 0xE00C, pci_quirk_xgene_acs },
{ 0 }
};
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee0bfc1..283953c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1085,7 +1085,7 @@
enum pci_bus_speed *speed,
enum pcie_link_width *width);
void pcie_print_link_status(struct pci_dev *dev);
-void pcie_flr(struct pci_dev *dev);
+int pcie_flr(struct pci_dev *dev);
int __pci_reset_function_locked(struct pci_dev *dev);
int pci_reset_function(struct pci_dev *dev);
int pci_reset_function_locked(struct pci_dev *dev);
@@ -1098,7 +1098,7 @@
int pci_try_reset_bus(struct pci_bus *bus);
void pci_reset_secondary_bus(struct pci_dev *dev);
void pcibios_reset_secondary_bus(struct pci_dev *dev);
-void pci_reset_bridge_secondary_bus(struct pci_dev *dev);
+int pci_reset_bridge_secondary_bus(struct pci_dev *dev);
void pci_update_resource(struct pci_dev *dev, int resno);
int __must_check pci_assign_resource(struct pci_dev *dev, int i);
int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align);
@@ -1299,7 +1299,6 @@
void pci_setup_bridge(struct pci_bus *bus);
resource_size_t pcibios_window_alignment(struct pci_bus *bus,
unsigned long type);
-resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
#define PCI_VGA_STATE_CHANGE_BRIDGE (1 << 0)
#define PCI_VGA_STATE_CHANGE_DECODES (1 << 1)
@@ -1922,6 +1921,7 @@
void pcibios_penalize_isa_irq(int irq, int active);
int pcibios_alloc_irq(struct pci_dev *dev);
void pcibios_free_irq(struct pci_dev *dev);
+resource_size_t pcibios_default_alignment(void);
#ifdef CONFIG_HIBERNATE_CALLBACKS
extern struct dev_pm_ops pcibios_pm_ops;
@@ -1954,6 +1954,11 @@
int pci_sriov_get_totalvfs(struct pci_dev *dev);
resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);
+
+/* Arch may override these (weak) */
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs);
+int pcibios_sriov_disable(struct pci_dev *pdev);
+resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno);
#else
static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id)
{
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6a96a70..f22124e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1333,6 +1333,7 @@
#define PCI_DEVICE_ID_IMS_TT3D 0x9135
#define PCI_VENDOR_ID_AMCC 0x10e8
+#define PCI_VENDOR_ID_AMPERE 0x1def
#define PCI_VENDOR_ID_INTERG 0x10ea
#define PCI_DEVICE_ID_INTERG_1682 0x1682