PCI: add support for function level reset
Sometimes, it's necessary to enable software's ability to quiesce and
reset endpoint hardware with function-level granularity, so provide
support for it.
The patch implement Function Level Reset(FLR) feature following PCI-e
spec. And this is the first step. We would add more generic method, like
D0/D3, to allow more devices support this function.
The patch contains two functions. pcie_reset_function() is the new
driver API, and, contains some action to quiesce a device. The other
function is a helper: pcie_execute_reset_function() just executes the
reset for a particular device function.
Current the usage model is in KVM. Function reset is necessary for
assigning device to a guest, or moving it between partitions.
For Function Level Reset(FLR), please refer to PCI Express spec chapter
6.6.2.
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index aee73cf..533aeb5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -18,6 +18,7 @@
#include <linux/log2.h>
#include <linux/pci-aspm.h>
#include <linux/pm_wakeup.h>
+#include <linux/interrupt.h>
#include <asm/dma.h> /* isa_dma_bridge_buggy */
#include "pci.h"
@@ -1746,6 +1747,103 @@
#endif
/**
+ * pci_execute_reset_function() - Reset a PCI device function
+ * @dev: Device function to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device. The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * The device function is presumed to be unused when this function is called.
+ * Resetting the device will make the contents of PCI configuration space
+ * random, so any caller of this must be prepared to reinitialise the
+ * device including MSI, bus mastering, BARs, decoding IO and memory spaces,
+ * etc.
+ *
+ * Returns 0 if the device function was successfully reset or -ENOTTY if the
+ * device doesn't support resetting a single function.
+ */
+int pci_execute_reset_function(struct pci_dev *dev)
+{
+ u16 status;
+ u32 cap;
+ int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+
+ if (!exppos)
+ return -ENOTTY;
+ pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap);
+ if (!(cap & PCI_EXP_DEVCAP_FLR))
+ return -ENOTTY;
+
+ pci_block_user_cfg_access(dev);
+
+ /* Wait for Transaction Pending bit clean */
+ msleep(100);
+ pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+ if (status & PCI_EXP_DEVSTA_TRPND) {
+ dev_info(&dev->dev, "Busy after 100ms while trying to reset; "
+ "sleeping for 1 second\n");
+ ssleep(1);
+ pci_read_config_word(dev, exppos + PCI_EXP_DEVSTA, &status);
+ if (status & PCI_EXP_DEVSTA_TRPND)
+ dev_info(&dev->dev, "Still busy after 1s; "
+ "proceeding with reset anyway\n");
+ }
+
+ pci_write_config_word(dev, exppos + PCI_EXP_DEVCTL,
+ PCI_EXP_DEVCTL_BCR_FLR);
+ mdelay(100);
+
+ pci_unblock_user_cfg_access(dev);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_execute_reset_function);
+
+/**
+ * pci_reset_function() - quiesce and reset a PCI device function
+ * @dev: Device function to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device. The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * This function does not just reset the PCI portion of a device, but
+ * clears all the state associated with the device. This function differs
+ * from pci_execute_reset_function in that it saves and restores device state
+ * over the reset.
+ *
+ * Returns 0 if the device function was successfully reset or -ENOTTY if the
+ * device doesn't support resetting a single function.
+ */
+int pci_reset_function(struct pci_dev *dev)
+{
+ u32 cap;
+ int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ int r;
+
+ if (!exppos)
+ return -ENOTTY;
+ pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap);
+ if (!(cap & PCI_EXP_DEVCAP_FLR))
+ return -ENOTTY;
+
+ if (!dev->msi_enabled && !dev->msix_enabled)
+ disable_irq(dev->irq);
+ pci_save_state(dev);
+
+ pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+
+ r = pci_execute_reset_function(dev);
+
+ pci_restore_state(dev);
+ if (!dev->msi_enabled && !dev->msix_enabled)
+ enable_irq(dev->irq);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(pci_reset_function);
+
+/**
* pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
* @dev: PCI device to query
*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 085187b..f6f6810 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -626,6 +626,8 @@
int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
int pcie_get_readrq(struct pci_dev *dev);
int pcie_set_readrq(struct pci_dev *dev, int rq);
+int pci_reset_function(struct pci_dev *dev);
+int pci_execute_reset_function(struct pci_dev *dev);
void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno);
int __must_check pci_assign_resource(struct pci_dev *dev, int i);
int pci_select_bars(struct pci_dev *dev, unsigned long flags);
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index eb6686b..e5effd4 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -377,6 +377,7 @@
#define PCI_EXP_DEVCAP_RBER 0x8000 /* Role-Based Error Reporting */
#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */
#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */
+#define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */
#define PCI_EXP_DEVCTL 8 /* Device Control */
#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */
#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */
@@ -389,6 +390,7 @@
#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */
#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */
#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */
+#define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */
#define PCI_EXP_DEVSTA 10 /* Device Status */
#define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */
#define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */