Blame - arch/powerpc/platforms/powernv/eeh-powernv.c - kernel/msm-4.9

blob: 622f08cf54b60d33e2794ca461ae35c3922b8404 [file] [log] [blame]

Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1	/*
				2	* The file intends to implement the platform dependent EEH operations on
				3	* powernv platform. Actually, the powernv was created in order to fully
				4	* hypervisor support.
				5	*
				6	* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
				7	*
				8	* This program is free software; you can redistribute it and/or modify
				9	* it under the terms of the GNU General Public License as published by
				10	* the Free Software Foundation; either version 2 of the License, or
				11	* (at your option) any later version.
				12	*/
				13
				14	#include <linux/atomic.h>
Gavin Shan	4cf1744	2015-02-16 14:45:41 +1100	[diff] [blame]	15	#include <linux/debugfs.h>
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	16	#include <linux/delay.h>
				17	#include <linux/export.h>
				18	#include <linux/init.h>
				19	#include <linux/list.h>
				20	#include <linux/msi.h>
				21	#include <linux/of.h>
				22	#include <linux/pci.h>
				23	#include <linux/proc_fs.h>
				24	#include <linux/rbtree.h>
				25	#include <linux/sched.h>
				26	#include <linux/seq_file.h>
				27	#include <linux/spinlock.h>
				28
				29	#include <asm/eeh.h>
				30	#include <asm/eeh_event.h>
				31	#include <asm/firmware.h>
				32	#include <asm/io.h>
				33	#include <asm/iommu.h>
				34	#include <asm/machdep.h>
				35	#include <asm/msi_bitmap.h>
				36	#include <asm/opal.h>
				37	#include <asm/ppc-pci.h>
				38
				39	#include "powernv.h"
				40	#include "pci.h"
				41
Gavin Shan	4cf1744	2015-02-16 14:45:41 +1100	[diff] [blame]	42	static bool pnv_eeh_nb_init = false;
				43
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	44	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	45	* pnv_eeh_init - EEH platform dependent initialization
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	46	*
				47	* EEH platform dependent initialization on powernv
				48	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	49	static int pnv_eeh_init(void)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	50	{
Gavin Shan	dc561fb	2014-07-17 14:41:39 +1000	[diff] [blame]	51	struct pci_controller *hose;
				52	struct pnv_phb *phb;
				53
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	54	/* We require OPALv3 */
				55	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
Gavin Shan	0dae274	2014-07-17 14:41:41 +1000	[diff] [blame]	56	pr_warn("%s: OPALv3 is required !\n",
				57	__func__);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	58	return -EINVAL;
				59	}
				60
Gavin Shan	05b1721	2014-07-17 14:41:38 +1000	[diff] [blame]	61	/* Set probe mode */
				62	eeh_add_flag(EEH_PROBE_MODE_DEV);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	63
Gavin Shan	dc561fb	2014-07-17 14:41:39 +1000	[diff] [blame]	64	/*
				65	* P7IOC blocks PCI config access to frozen PE, but PHB3
				66	* doesn't do that. So we have to selectively enable I/O
				67	* prior to collecting error log.
				68	*/
				69	list_for_each_entry(hose, &hose_list, list_node) {
				70	phb = hose->private_data;
				71
				72	if (phb->model == PNV_PHB_MODEL_P7IOC)
				73	eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
Gavin Shan	2aa5cf9	2014-11-25 09:27:00 +1100	[diff] [blame]	74
				75	/*
				76	* PE#0 should be regarded as valid by EEH core
				77	* if it's not the reserved one. Currently, we
				78	* have the reserved PE#0 and PE#127 for PHB3
				79	* and P7IOC separately. So we should regard
				80	* PE#0 as valid for P7IOC.
				81	*/
				82	if (phb->ioda.reserved_pe != 0)
				83	eeh_add_flag(EEH_VALID_PE_ZERO);
				84
Gavin Shan	dc561fb	2014-07-17 14:41:39 +1000	[diff] [blame]	85	break;
				86	}
				87
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	88	return 0;
				89	}
				90
Gavin Shan	4cf1744	2015-02-16 14:45:41 +1100	[diff] [blame]	91	static int pnv_eeh_event(struct notifier_block *nb,
				92	unsigned long events, void *change)
				93	{
				94	uint64_t changed_evts = (uint64_t)change;
				95
				96	/*
				97	* We simply send special EEH event if EEH has
				98	* been enabled, or clear pending events in
				99	* case that we enable EEH soon
				100	*/
				101	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) \|\|
				102	!(events & OPAL_EVENT_PCI_ERROR))
				103	return 0;
				104
				105	if (eeh_enabled())
				106	eeh_send_failure_event(NULL);
				107	else
				108	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
				109
				110	return 0;
				111	}
				112
				113	static struct notifier_block pnv_eeh_nb = {
				114	.notifier_call = pnv_eeh_event,
				115	.next = NULL,
				116	.priority = 0
				117	};
				118
				119	#ifdef CONFIG_DEBUG_FS
				120	static ssize_t pnv_eeh_ei_write(struct file *filp,
				121	const char __user *user_buf,
				122	size_t count, loff_t *ppos)
				123	{
				124	struct pci_controller *hose = filp->private_data;
				125	struct eeh_dev *edev;
				126	struct eeh_pe *pe;
				127	int pe_no, type, func;
				128	unsigned long addr, mask;
				129	char buf[50];
				130	int ret;
				131
				132	if (!eeh_ops \|\| !eeh_ops->err_inject)
				133	return -ENXIO;
				134
				135	/* Copy over argument buffer */
				136	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
				137	if (!ret)
				138	return -EFAULT;
				139
				140	/* Retrieve parameters */
				141	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
				142	&pe_no, &type, &func, &addr, &mask);
				143	if (ret != 5)
				144	return -EINVAL;
				145
				146	/* Retrieve PE */
				147	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
				148	if (!edev)
				149	return -ENOMEM;
				150	edev->phb = hose;
				151	edev->pe_config_addr = pe_no;
				152	pe = eeh_pe_get(edev);
				153	kfree(edev);
				154	if (!pe)
				155	return -ENODEV;
				156
				157	/* Do error injection */
				158	ret = eeh_ops->err_inject(pe, type, func, addr, mask);
				159	return ret < 0 ? ret : count;
				160	}
				161
				162	static const struct file_operations pnv_eeh_ei_fops = {
				163	.open = simple_open,
				164	.llseek = no_llseek,
				165	.write = pnv_eeh_ei_write,
				166	};
				167
				168	static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
				169	{
				170	struct pci_controller *hose = data;
				171	struct pnv_phb *phb = hose->private_data;
				172
				173	out_be64(phb->regs + offset, val);
				174	return 0;
				175	}
				176
				177	static int pnv_eeh_dbgfs_get(void data, int offset, u64 val)
				178	{
				179	struct pci_controller *hose = data;
				180	struct pnv_phb *phb = hose->private_data;
				181
				182	*val = in_be64(phb->regs + offset);
				183	return 0;
				184	}
				185
				186	static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
				187	{
				188	return pnv_eeh_dbgfs_set(data, 0xD10, val);
				189	}
				190
				191	static int pnv_eeh_outb_dbgfs_get(void data, u64 val)
				192	{
				193	return pnv_eeh_dbgfs_get(data, 0xD10, val);
				194	}
				195
				196	static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
				197	{
				198	return pnv_eeh_dbgfs_set(data, 0xD90, val);
				199	}
				200
				201	static int pnv_eeh_inbA_dbgfs_get(void data, u64 val)
				202	{
				203	return pnv_eeh_dbgfs_get(data, 0xD90, val);
				204	}
				205
				206	static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
				207	{
				208	return pnv_eeh_dbgfs_set(data, 0xE10, val);
				209	}
				210
				211	static int pnv_eeh_inbB_dbgfs_get(void data, u64 val)
				212	{
				213	return pnv_eeh_dbgfs_get(data, 0xE10, val);
				214	}
				215
				216	DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
				217	pnv_eeh_outb_dbgfs_set, "0x%llx\n");
				218	DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
				219	pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
				220	DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
				221	pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
				222	#endif /* CONFIG_DEBUG_FS */
				223
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	224	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	225	* pnv_eeh_post_init - EEH platform dependent post initialization
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	226	*
				227	* EEH platform dependent post initialization on powernv. When
				228	* the function is called, the EEH PEs and devices should have
				229	* been built. If the I/O cache staff has been built, EEH is
				230	* ready to supply service.
				231	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	232	static int pnv_eeh_post_init(void)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	233	{
				234	struct pci_controller *hose;
				235	struct pnv_phb *phb;
				236	int ret = 0;
				237
Gavin Shan	4cf1744	2015-02-16 14:45:41 +1100	[diff] [blame]	238	/* Register OPAL event notifier */
				239	if (!pnv_eeh_nb_init) {
				240	ret = opal_notifier_register(&pnv_eeh_nb);
				241	if (ret) {
				242	pr_warn("%s: Can't register OPAL event notifier (%d)\n",
				243	__func__, ret);
				244	return ret;
				245	}
				246
				247	pnv_eeh_nb_init = true;
				248	}
				249
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	250	list_for_each_entry(hose, &hose_list, list_node) {
				251	phb = hose->private_data;
				252
Gavin Shan	4cf1744	2015-02-16 14:45:41 +1100	[diff] [blame]	253	/*
				254	* If EEH is enabled, we're going to rely on that.
				255	* Otherwise, we restore to conventional mechanism
				256	* to clear frozen PE during PCI config access.
				257	*/
				258	if (eeh_enabled())
				259	phb->flags \|= PNV_PHB_FLAG_EEH;
				260	else
				261	phb->flags &= ~PNV_PHB_FLAG_EEH;
				262
				263	/* Create debugfs entries */
				264	#ifdef CONFIG_DEBUG_FS
				265	if (phb->has_dbgfs \|\| !phb->dbgfs)
				266	continue;
				267
				268	phb->has_dbgfs = 1;
				269	debugfs_create_file("err_injct", 0200,
				270	phb->dbgfs, hose,
				271	&pnv_eeh_ei_fops);
				272
				273	debugfs_create_file("err_injct_outbound", 0600,
				274	phb->dbgfs, hose,
				275	&pnv_eeh_outb_dbgfs_ops);
				276	debugfs_create_file("err_injct_inboundA", 0600,
				277	phb->dbgfs, hose,
				278	&pnv_eeh_inbA_dbgfs_ops);
				279	debugfs_create_file("err_injct_inboundB", 0600,
				280	phb->dbgfs, hose,
				281	&pnv_eeh_inbB_dbgfs_ops);
				282	#endif /* CONFIG_DEBUG_FS */
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	283	}
				284
Gavin Shan	4cf1744	2015-02-16 14:45:41 +1100	[diff] [blame]	285
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	286	return ret;
				287	}
				288
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	289	static int pnv_eeh_cap_start(struct pci_dn *pdn)
				290	{
				291	u32 status;
				292
				293	if (!pdn)
				294	return 0;
				295
				296	pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
				297	if (!(status & PCI_STATUS_CAP_LIST))
				298	return 0;
				299
				300	return PCI_CAPABILITY_LIST;
				301	}
				302
				303	static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
				304	{
				305	int pos = pnv_eeh_cap_start(pdn);
				306	int cnt = 48; /* Maximal number of capabilities */
				307	u32 id;
				308
				309	if (!pos)
				310	return 0;
				311
				312	while (cnt--) {
				313	pnv_pci_cfg_read(pdn, pos, 1, &pos);
				314	if (pos < 0x40)
				315	break;
				316
				317	pos &= ~3;
				318	pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
				319	if (id == 0xff)
				320	break;
				321
				322	/* Found */
				323	if (id == cap)
				324	return pos;
				325
				326	/* Next one */
				327	pos += PCI_CAP_LIST_NEXT;
				328	}
				329
				330	return 0;
				331	}
				332
				333	static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
				334	{
				335	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
				336	u32 header;
				337	int pos = 256, ttl = (4096 - 256) / 8;
				338
				339	if (!edev \|\| !edev->pcie_cap)
				340	return 0;
				341	if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
				342	return 0;
				343	else if (!header)
				344	return 0;
				345
				346	while (ttl-- > 0) {
				347	if (PCI_EXT_CAP_ID(header) == cap && pos)
				348	return pos;
				349
				350	pos = PCI_EXT_CAP_NEXT(header);
				351	if (pos < 256)
				352	break;
				353
				354	if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
				355	break;
				356	}
				357
				358	return 0;
				359	}
				360
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	361	/**
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	362	* pnv_eeh_probe - Do probe on PCI device
				363	* @pdn: PCI device node
				364	* @data: unused
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	365	*
				366	* When EEH module is installed during system boot, all PCI devices
				367	* are checked one by one to see if it supports EEH. The function
				368	* is introduced for the purpose. By default, EEH has been enabled
				369	* on all PCI devices. That's to say, we only need do necessary
				370	* initialization on the corresponding eeh device and create PE
				371	* accordingly.
				372	*
				373	* It's notable that's unsafe to retrieve the EEH device through
				374	* the corresponding PCI device. During the PCI device hotplug, which
				375	* was possiblly triggered by EEH core, the binding between EEH device
				376	* and the PCI device isn't built yet.
				377	*/
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	378	static void pnv_eeh_probe(struct pci_dn pdn, void *data)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	379	{
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	380	struct pci_controller *hose = pdn->phb;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	381	struct pnv_phb *phb = hose->private_data;
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	382	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
				383	uint32_t pcie_flags;
Mike Qiu	dadcd6d	2014-06-26 02:58:47 -0400	[diff] [blame]	384	int ret;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	385
				386	/*
				387	* When probing the root bridge, which doesn't have any
				388	* subordinate PCI devices. We don't have OF node for
				389	* the root bridge. So it's not reasonable to continue
				390	* the probing.
				391	*/
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	392	if (!edev \|\| edev->pe)
				393	return NULL;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	394
				395	/* Skip for PCI-ISA bridge */
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	396	if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
				397	return NULL;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	398
				399	/* Initialize eeh device */
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	400	edev->class_code = pdn->class_code;
Gavin Shan	ab55d21	2013-07-24 10:25:01 +0800	[diff] [blame]	401	edev->mode &= 0xFFFFFF00;
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	402	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
				403	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
				404	edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
				405	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
Gavin Shan	4b83bd4	2013-07-24 10:24:59 +0800	[diff] [blame]	406	edev->mode \|= EEH_DEV_BRIDGE;
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	407	if (edev->pcie_cap) {
				408	pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
				409	2, &pcie_flags);
				410	pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
				411	if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
				412	edev->mode \|= EEH_DEV_ROOT_PORT;
				413	else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
				414	edev->mode \|= EEH_DEV_DS_PORT;
				415	}
Gavin Shan	4b83bd4	2013-07-24 10:24:59 +0800	[diff] [blame]	416	}
				417
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	418	edev->config_addr = (pdn->busno << 8) \| (pdn->devfn);
				419	edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	420
				421	/* Create PE */
Mike Qiu	dadcd6d	2014-06-26 02:58:47 -0400	[diff] [blame]	422	ret = eeh_add_to_parent_pe(edev);
				423	if (ret) {
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	424	pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n",
				425	__func__, hose->global_number, pdn->busno,
				426	PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
				427	return NULL;
Mike Qiu	dadcd6d	2014-06-26 02:58:47 -0400	[diff] [blame]	428	}
				429
				430	/*
Gavin Shan	b6541db	2014-10-01 17:07:53 +1000	[diff] [blame]	431	* If the PE contains any one of following adapters, the
				432	* PCI config space can't be accessed when dumping EEH log.
				433	* Otherwise, we will run into fenced PHB caused by shortage
				434	* of outbound credits in the adapter. The PCI config access
				435	* should be blocked until PE reset. MMIO access is dropped
				436	* by hardware certainly. In order to drop PCI config requests,
				437	* one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
				438	* will be checked in the backend for PE state retrival. If
				439	* the PE becomes frozen for the first time and the flag has
				440	* been set for the PE, we will set EEH_PE_CFG_BLOCKED for
				441	* that PE to block its config space.
				442	*
				443	* Broadcom Austin 4-ports NICs (14e4:1657)
Gavin Shan	179ea48	2014-10-03 14:58:32 +1000	[diff] [blame]	444	* Broadcom Shiner 2-ports 10G NICs (14e4:168e)
Gavin Shan	b6541db	2014-10-01 17:07:53 +1000	[diff] [blame]	445	*/
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	446	if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
				447	pdn->device_id == 0x1657) \|\|
				448	(pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
				449	pdn->device_id == 0x168e))
Gavin Shan	b6541db	2014-10-01 17:07:53 +1000	[diff] [blame]	450	edev->pe->state \|= EEH_PE_CFG_RESTRICTED;
				451
				452	/*
Mike Qiu	dadcd6d	2014-06-26 02:58:47 -0400	[diff] [blame]	453	* Cache the PE primary bus, which can't be fetched when
				454	* full hotplug is in progress. In that case, all child
				455	* PCI devices of the PE are expected to be removed prior
				456	* to PE reset.
				457	*/
				458	if (!edev->pe->bus)
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	459	edev->pe->bus = pci_find_bus(hose->global_number,
				460	pdn->busno);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	461
				462	/*
				463	* Enable EEH explicitly so that we will do EEH check
				464	* while accessing I/O stuff
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	465	*/
Gavin Shan	05b1721	2014-07-17 14:41:38 +1000	[diff] [blame]	466	eeh_add_flag(EEH_ENABLED);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	467
				468	/* Save memory bars */
				469	eeh_save_bars(edev);
				470
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	471	return NULL;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	472	}
				473
				474	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	475	* pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	476	* @pe: EEH PE
				477	* @option: operation to be issued
				478	*
				479	* The function is used to control the EEH functionality globally.
				480	* Currently, following options are support according to PAPR:
				481	* Enable EEH, Disable EEH, Enable MMIO and Enable DMA
				482	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	483	static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	484	{
				485	struct pci_controller *hose = pe->phb;
				486	struct pnv_phb *phb = hose->private_data;
Gavin Shan	7e3e4f8	2015-02-16 14:45:44 +1100	[diff] [blame]	487	bool freeze_pe = false;
				488	int opt, ret = 0;
				489	s64 rc;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	490
Gavin Shan	7e3e4f8	2015-02-16 14:45:44 +1100	[diff] [blame]	491	/* Sanity check on option */
				492	switch (option) {
				493	case EEH_OPT_DISABLE:
				494	return -EPERM;
				495	case EEH_OPT_ENABLE:
				496	return 0;
				497	case EEH_OPT_THAW_MMIO:
				498	opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
				499	break;
				500	case EEH_OPT_THAW_DMA:
				501	opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
				502	break;
				503	case EEH_OPT_FREEZE_PE:
				504	freeze_pe = true;
				505	opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
				506	break;
				507	default:
				508	pr_warn("%s: Invalid option %d\n", __func__, option);
				509	return -EINVAL;
				510	}
				511
				512	/* If PHB supports compound PE, to handle it */
				513	if (freeze_pe) {
				514	if (phb->freeze_pe) {
				515	phb->freeze_pe(phb, pe->addr);
				516	} else {
				517	rc = opal_pci_eeh_freeze_set(phb->opal_id,
				518	pe->addr, opt);
				519	if (rc != OPAL_SUCCESS) {
				520	pr_warn("%s: Failure %lld freezing "
				521	"PHB#%x-PE#%x\n",
				522	__func__, rc,
				523	phb->hose->global_number, pe->addr);
				524	ret = -EIO;
				525	}
				526	}
				527	} else {
				528	if (phb->unfreeze_pe) {
				529	ret = phb->unfreeze_pe(phb, pe->addr, opt);
				530	} else {
				531	rc = opal_pci_eeh_freeze_clear(phb->opal_id,
				532	pe->addr, opt);
				533	if (rc != OPAL_SUCCESS) {
				534	pr_warn("%s: Failure %lld enable %d "
				535	"for PHB#%x-PE#%x\n",
				536	__func__, rc, option,
				537	phb->hose->global_number, pe->addr);
				538	ret = -EIO;
				539	}
				540	}
				541	}
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	542
				543	return ret;
				544	}
				545
				546	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	547	* pnv_eeh_get_pe_addr - Retrieve PE address
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	548	* @pe: EEH PE
				549	*
				550	* Retrieve the PE address according to the given tranditional
				551	* PCI BDF (Bus/Device/Function) address.
				552	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	553	static int pnv_eeh_get_pe_addr(struct eeh_pe *pe)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	554	{
				555	return pe->addr;
				556	}
				557
Gavin Shan	40ae5f6	2015-02-16 14:45:45 +1100	[diff] [blame]	558	static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
				559	{
				560	struct pnv_phb *phb = pe->phb->private_data;
				561	s64 rc;
				562
				563	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
				564	PNV_PCI_DIAG_BUF_SIZE);
				565	if (rc != OPAL_SUCCESS)
				566	pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
				567	__func__, rc, pe->phb->global_number);
				568	}
				569
				570	static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
				571	{
				572	struct pnv_phb *phb = pe->phb->private_data;
				573	u8 fstate;
				574	__be16 pcierr;
				575	s64 rc;
				576	int result = 0;
				577
				578	rc = opal_pci_eeh_freeze_status(phb->opal_id,
				579	pe->addr,
				580	&fstate,
				581	&pcierr,
				582	NULL);
				583	if (rc != OPAL_SUCCESS) {
				584	pr_warn("%s: Failure %lld getting PHB#%x state\n",
				585	__func__, rc, phb->hose->global_number);
				586	return EEH_STATE_NOT_SUPPORT;
				587	}
				588
				589	/*
				590	* Check PHB state. If the PHB is frozen for the
				591	* first time, to dump the PHB diag-data.
				592	*/
				593	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
				594	result = (EEH_STATE_MMIO_ACTIVE \|
				595	EEH_STATE_DMA_ACTIVE \|
				596	EEH_STATE_MMIO_ENABLED \|
				597	EEH_STATE_DMA_ENABLED);
				598	} else if (!(pe->state & EEH_PE_ISOLATED)) {
				599	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
				600	pnv_eeh_get_phb_diag(pe);
				601
				602	if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
				603	pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
				604	}
				605
				606	return result;
				607	}
				608
				609	static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
				610	{
				611	struct pnv_phb *phb = pe->phb->private_data;
				612	u8 fstate;
				613	__be16 pcierr;
				614	s64 rc;
				615	int result;
				616
				617	/*
				618	* We don't clobber hardware frozen state until PE
				619	* reset is completed. In order to keep EEH core
				620	* moving forward, we have to return operational
				621	* state during PE reset.
				622	*/
				623	if (pe->state & EEH_PE_RESET) {
				624	result = (EEH_STATE_MMIO_ACTIVE \|
				625	EEH_STATE_DMA_ACTIVE \|
				626	EEH_STATE_MMIO_ENABLED \|
				627	EEH_STATE_DMA_ENABLED);
				628	return result;
				629	}
				630
				631	/*
				632	* Fetch PE state from hardware. If the PHB
				633	* supports compound PE, let it handle that.
				634	*/
				635	if (phb->get_pe_state) {
				636	fstate = phb->get_pe_state(phb, pe->addr);
				637	} else {
				638	rc = opal_pci_eeh_freeze_status(phb->opal_id,
				639	pe->addr,
				640	&fstate,
				641	&pcierr,
				642	NULL);
				643	if (rc != OPAL_SUCCESS) {
				644	pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
				645	__func__, rc, phb->hose->global_number,
				646	pe->addr);
				647	return EEH_STATE_NOT_SUPPORT;
				648	}
				649	}
				650
				651	/* Figure out state */
				652	switch (fstate) {
				653	case OPAL_EEH_STOPPED_NOT_FROZEN:
				654	result = (EEH_STATE_MMIO_ACTIVE \|
				655	EEH_STATE_DMA_ACTIVE \|
				656	EEH_STATE_MMIO_ENABLED \|
				657	EEH_STATE_DMA_ENABLED);
				658	break;
				659	case OPAL_EEH_STOPPED_MMIO_FREEZE:
				660	result = (EEH_STATE_DMA_ACTIVE \|
				661	EEH_STATE_DMA_ENABLED);
				662	break;
				663	case OPAL_EEH_STOPPED_DMA_FREEZE:
				664	result = (EEH_STATE_MMIO_ACTIVE \|
				665	EEH_STATE_MMIO_ENABLED);
				666	break;
				667	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
				668	result = 0;
				669	break;
				670	case OPAL_EEH_STOPPED_RESET:
				671	result = EEH_STATE_RESET_ACTIVE;
				672	break;
				673	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
				674	result = EEH_STATE_UNAVAILABLE;
				675	break;
				676	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
				677	result = EEH_STATE_NOT_SUPPORT;
				678	break;
				679	default:
				680	result = EEH_STATE_NOT_SUPPORT;
				681	pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
				682	__func__, phb->hose->global_number,
				683	pe->addr, fstate);
				684	}
				685
				686	/*
				687	* If PHB supports compound PE, to freeze all
				688	* slave PEs for consistency.
				689	*
				690	* If the PE is switching to frozen state for the
				691	* first time, to dump the PHB diag-data.
				692	*/
				693	if (!(result & EEH_STATE_NOT_SUPPORT) &&
				694	!(result & EEH_STATE_UNAVAILABLE) &&
				695	!(result & EEH_STATE_MMIO_ACTIVE) &&
				696	!(result & EEH_STATE_DMA_ACTIVE) &&
				697	!(pe->state & EEH_PE_ISOLATED)) {
				698	if (phb->freeze_pe)
				699	phb->freeze_pe(phb, pe->addr);
				700
				701	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
				702	pnv_eeh_get_phb_diag(pe);
				703
				704	if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
				705	pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
				706	}
				707
				708	return result;
				709	}
				710
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	711	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	712	* pnv_eeh_get_state - Retrieve PE state
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	713	* @pe: EEH PE
				714	* @delay: delay while PE state is temporarily unavailable
				715	*
				716	* Retrieve the state of the specified PE. For IODA-compitable
				717	* platform, it should be retrieved from IODA table. Therefore,
				718	* we prefer passing down to hardware implementation to handle
				719	* it.
				720	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	721	static int pnv_eeh_get_state(struct eeh_pe pe, int delay)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	722	{
Gavin Shan	40ae5f6	2015-02-16 14:45:45 +1100	[diff] [blame]	723	int ret;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	724
Gavin Shan	40ae5f6	2015-02-16 14:45:45 +1100	[diff] [blame]	725	if (pe->type & EEH_PE_PHB)
				726	ret = pnv_eeh_get_phb_state(pe);
				727	else
				728	ret = pnv_eeh_get_pe_state(pe);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	729
Gavin Shan	40ae5f6	2015-02-16 14:45:45 +1100	[diff] [blame]	730	if (!delay)
				731	return ret;
				732
				733	/*
				734	* If the PE state is temporarily unavailable,
				735	* to inform the EEH core delay for default
				736	* period (1 second)
				737	*/
				738	*delay = 0;
				739	if (ret & EEH_STATE_UNAVAILABLE)
				740	*delay = 1000;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	741
				742	return ret;
				743	}
				744
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	745	static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
				746	{
				747	s64 rc = OPAL_HARDWARE;
				748
				749	while (1) {
				750	rc = opal_pci_poll(phb->opal_id);
				751	if (rc <= 0)
				752	break;
				753
				754	if (system_state < SYSTEM_RUNNING)
				755	udelay(1000 * rc);
				756	else
				757	msleep(rc);
				758	}
				759
				760	return rc;
				761	}
				762
				763	int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
				764	{
				765	struct pnv_phb *phb = hose->private_data;
				766	s64 rc = OPAL_HARDWARE;
				767
				768	pr_debug("%s: Reset PHB#%x, option=%d\n",
				769	__func__, hose->global_number, option);
				770
				771	/* Issue PHB complete reset request */
				772	if (option == EEH_RESET_FUNDAMENTAL \|\|
				773	option == EEH_RESET_HOT)
				774	rc = opal_pci_reset(phb->opal_id,
				775	OPAL_RESET_PHB_COMPLETE,
				776	OPAL_ASSERT_RESET);
				777	else if (option == EEH_RESET_DEACTIVATE)
				778	rc = opal_pci_reset(phb->opal_id,
				779	OPAL_RESET_PHB_COMPLETE,
				780	OPAL_DEASSERT_RESET);
				781	if (rc < 0)
				782	goto out;
				783
				784	/*
				785	* Poll state of the PHB until the request is done
				786	* successfully. The PHB reset is usually PHB complete
				787	* reset followed by hot reset on root bus. So we also
				788	* need the PCI bus settlement delay.
				789	*/
				790	rc = pnv_eeh_phb_poll(phb);
				791	if (option == EEH_RESET_DEACTIVATE) {
				792	if (system_state < SYSTEM_RUNNING)
				793	udelay(1000 * EEH_PE_RST_SETTLE_TIME);
				794	else
				795	msleep(EEH_PE_RST_SETTLE_TIME);
				796	}
				797	out:
				798	if (rc != OPAL_SUCCESS)
				799	return -EIO;
				800
				801	return 0;
				802	}
				803
				804	static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
				805	{
				806	struct pnv_phb *phb = hose->private_data;
				807	s64 rc = OPAL_HARDWARE;
				808
				809	pr_debug("%s: Reset PHB#%x, option=%d\n",
				810	__func__, hose->global_number, option);
				811
				812	/*
				813	* During the reset deassert time, we needn't care
				814	* the reset scope because the firmware does nothing
				815	* for fundamental or hot reset during deassert phase.
				816	*/
				817	if (option == EEH_RESET_FUNDAMENTAL)
				818	rc = opal_pci_reset(phb->opal_id,
				819	OPAL_RESET_PCI_FUNDAMENTAL,
				820	OPAL_ASSERT_RESET);
				821	else if (option == EEH_RESET_HOT)
				822	rc = opal_pci_reset(phb->opal_id,
				823	OPAL_RESET_PCI_HOT,
				824	OPAL_ASSERT_RESET);
				825	else if (option == EEH_RESET_DEACTIVATE)
				826	rc = opal_pci_reset(phb->opal_id,
				827	OPAL_RESET_PCI_HOT,
				828	OPAL_DEASSERT_RESET);
				829	if (rc < 0)
				830	goto out;
				831
				832	/* Poll state of the PHB until the request is done */
				833	rc = pnv_eeh_phb_poll(phb);
				834	if (option == EEH_RESET_DEACTIVATE)
				835	msleep(EEH_PE_RST_SETTLE_TIME);
				836	out:
				837	if (rc != OPAL_SUCCESS)
				838	return -EIO;
				839
				840	return 0;
				841	}
				842
				843	static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
				844	{
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	845	struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
				846	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	847	int aer = edev ? edev->aer_cap : 0;
				848	u32 ctrl;
				849
				850	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
				851	__func__, pci_domain_nr(dev->bus),
				852	dev->bus->number, option);
				853
				854	switch (option) {
				855	case EEH_RESET_FUNDAMENTAL:
				856	case EEH_RESET_HOT:
				857	/* Don't report linkDown event */
				858	if (aer) {
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	859	eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	860	4, &ctrl);
				861	ctrl \|= PCI_ERR_UNC_SURPDN;
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	862	eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	863	4, ctrl);
				864	}
				865
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	866	eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	867	ctrl \|= PCI_BRIDGE_CTL_BUS_RESET;
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	868	eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	869
				870	msleep(EEH_PE_RST_HOLD_TIME);
				871	break;
				872	case EEH_RESET_DEACTIVATE:
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	873	eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	874	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	875	eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	876
				877	msleep(EEH_PE_RST_SETTLE_TIME);
				878
				879	/* Continue reporting linkDown event */
				880	if (aer) {
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	881	eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	882	4, &ctrl);
				883	ctrl &= ~PCI_ERR_UNC_SURPDN;
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	884	eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	885	4, ctrl);
				886	}
				887
				888	break;
				889	}
				890
				891	return 0;
				892	}
				893
				894	void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
				895	{
				896	struct pci_controller *hose;
				897
				898	if (pci_is_root_bus(dev->bus)) {
				899	hose = pci_bus_to_host(dev->bus);
				900	pnv_eeh_root_reset(hose, EEH_RESET_HOT);
				901	pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
				902	} else {
				903	pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
				904	pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
				905	}
				906	}
				907
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	908	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	909	* pnv_eeh_reset - Reset the specified PE
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	910	* @pe: EEH PE
				911	* @option: reset option
				912	*
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	913	* Do reset on the indicated PE. For PCI bus sensitive PE,
				914	* we need to reset the parent p2p bridge. The PHB has to
				915	* be reinitialized if the p2p bridge is root bridge. For
				916	* PCI device sensitive PE, we will try to reset the device
				917	* through FLR. For now, we don't have OPAL APIs to do HARD
				918	* reset yet, so all reset would be SOFT (HOT) reset.
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	919	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	920	static int pnv_eeh_reset(struct eeh_pe *pe, int option)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	921	{
				922	struct pci_controller *hose = pe->phb;
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	923	struct pci_bus *bus;
				924	int ret;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	925
Gavin Shan	cadf364	2015-02-16 14:45:47 +1100	[diff] [blame]	926	/*
				927	* For PHB reset, we always have complete reset. For those PEs whose
				928	* primary bus derived from root complex (root bus) or root port
				929	* (usually bus#1), we apply hot or fundamental reset on the root port.
				930	* For other PEs, we always have hot reset on the PE primary bus.
				931	*
				932	* Here, we have different design to pHyp, which always clear the
				933	* frozen state during PE reset. However, the good idea here from
				934	* benh is to keep frozen state before we get PE reset done completely
				935	* (until BAR restore). With the frozen state, HW drops illegal IO
				936	* or MMIO access, which can incur recrusive frozen PE during PE
				937	* reset. The side effect is that EEH core has to clear the frozen
				938	* state explicitly after BAR restore.
				939	*/
				940	if (pe->type & EEH_PE_PHB) {
				941	ret = pnv_eeh_phb_reset(hose, option);
				942	} else {
				943	struct pnv_phb *phb;
				944	s64 rc;
				945
				946	/*
				947	* The frozen PE might be caused by PAPR error injection
				948	* registers, which are expected to be cleared after hitting
				949	* frozen PE as stated in the hardware spec. Unfortunately,
				950	* that's not true on P7IOC. So we have to clear it manually
				951	* to avoid recursive EEH errors during recovery.
				952	*/
				953	phb = hose->private_data;
				954	if (phb->model == PNV_PHB_MODEL_P7IOC &&
				955	(option == EEH_RESET_HOT \|\|
				956	option == EEH_RESET_FUNDAMENTAL)) {
				957	rc = opal_pci_reset(phb->opal_id,
				958	OPAL_RESET_PHB_ERROR,
				959	OPAL_ASSERT_RESET);
				960	if (rc != OPAL_SUCCESS) {
				961	pr_warn("%s: Failure %lld clearing "
				962	"error injection registers\n",
				963	__func__, rc);
				964	return -EIO;
				965	}
				966	}
				967
				968	bus = eeh_pe_bus_get(pe);
				969	if (pci_is_root_bus(bus) \|\|
				970	pci_is_root_bus(bus->parent))
				971	ret = pnv_eeh_root_reset(hose, option);
				972	else
				973	ret = pnv_eeh_bridge_reset(bus->self, option);
				974	}
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	975
				976	return ret;
				977	}
				978
				979	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	980	* pnv_eeh_wait_state - Wait for PE state
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	981	* @pe: EEH PE
Wei Yang	2ac3990	2015-04-27 09:25:10 +0800	[diff] [blame]	982	* @max_wait: maximal period in millisecond
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	983	*
				984	* Wait for the state of associated PE. It might take some time
				985	* to retrieve the PE's state.
				986	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	987	static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	988	{
				989	int ret;
				990	int mwait;
				991
				992	while (1) {
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	993	ret = pnv_eeh_get_state(pe, &mwait);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	994
				995	/*
				996	* If the PE's state is temporarily unavailable,
				997	* we have to wait for the specified time. Otherwise,
				998	* the PE's state will be returned immediately.
				999	*/
				1000	if (ret != EEH_STATE_UNAVAILABLE)
				1001	return ret;
				1002
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1003	if (max_wait <= 0) {
Gavin Shan	0dae274	2014-07-17 14:41:41 +1000	[diff] [blame]	1004	pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
				1005	__func__, pe->addr, max_wait);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1006	return EEH_STATE_NOT_SUPPORT;
				1007	}
				1008
Wei Yang	e17866d	2015-04-27 09:25:11 +0800	[diff] [blame^]	1009	max_wait -= mwait;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1010	msleep(mwait);
				1011	}
				1012
				1013	return EEH_STATE_NOT_SUPPORT;
				1014	}
				1015
				1016	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1017	* pnv_eeh_get_log - Retrieve error log
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1018	* @pe: EEH PE
				1019	* @severity: temporary or permanent error log
				1020	* @drv_log: driver log to be combined with retrieved error log
				1021	* @len: length of driver log
				1022	*
				1023	* Retrieve the temporary or permanent error from the PE.
				1024	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1025	static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
				1026	char *drv_log, unsigned long len)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1027	{
Gavin Shan	95edcde	2015-02-16 14:45:42 +1100	[diff] [blame]	1028	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
				1029	pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1030
Gavin Shan	95edcde	2015-02-16 14:45:42 +1100	[diff] [blame]	1031	return 0;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1032	}
				1033
				1034	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1035	* pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1036	* @pe: EEH PE
				1037	*
				1038	* The function will be called to reconfigure the bridges included
				1039	* in the specified PE so that the mulfunctional PE would be recovered
				1040	* again.
				1041	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1042	static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1043	{
Gavin Shan	bbe170e	2015-02-16 14:45:43 +1100	[diff] [blame]	1044	return 0;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1045	}
				1046
				1047	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1048	* pnv_pe_err_inject - Inject specified error to the indicated PE
Gavin Shan	131c123	2014-09-30 12:38:56 +1000	[diff] [blame]	1049	* @pe: the indicated PE
				1050	* @type: error type
				1051	* @func: specific error type
				1052	* @addr: address
				1053	* @mask: address mask
				1054	*
				1055	* The routine is called to inject specified error, which is
				1056	* determined by @type and @func, to the indicated PE for
				1057	* testing purpose.
				1058	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1059	static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
				1060	unsigned long addr, unsigned long mask)
Gavin Shan	131c123	2014-09-30 12:38:56 +1000	[diff] [blame]	1061	{
				1062	struct pci_controller *hose = pe->phb;
				1063	struct pnv_phb *phb = hose->private_data;
Gavin Shan	fa646c3	2015-02-16 14:45:40 +1100	[diff] [blame]	1064	s64 rc;
Gavin Shan	131c123	2014-09-30 12:38:56 +1000	[diff] [blame]	1065
Gavin Shan	fa646c3	2015-02-16 14:45:40 +1100	[diff] [blame]	1066	/* Sanity check on error type */
				1067	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
				1068	type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
				1069	pr_warn("%s: Invalid error type %d\n",
				1070	__func__, type);
				1071	return -ERANGE;
				1072	}
Gavin Shan	131c123	2014-09-30 12:38:56 +1000	[diff] [blame]	1073
Gavin Shan	fa646c3	2015-02-16 14:45:40 +1100	[diff] [blame]	1074	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR \|\|
				1075	func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
				1076	pr_warn("%s: Invalid error function %d\n",
				1077	__func__, func);
				1078	return -ERANGE;
				1079	}
				1080
				1081	/* Firmware supports error injection ? */
				1082	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
				1083	pr_warn("%s: Firmware doesn't support error injection\n",
				1084	__func__);
				1085	return -ENXIO;
				1086	}
				1087
				1088	/* Do error injection */
				1089	rc = opal_pci_err_inject(phb->opal_id, pe->addr,
				1090	type, func, addr, mask);
				1091	if (rc != OPAL_SUCCESS) {
				1092	pr_warn("%s: Failure %lld injecting error "
				1093	"%d-%d to PHB#%x-PE#%x\n",
				1094	__func__, rc, type, func,
				1095	hose->global_number, pe->addr);
				1096	return -EIO;
				1097	}
				1098
				1099	return 0;
Gavin Shan	131c123	2014-09-30 12:38:56 +1000	[diff] [blame]	1100	}
				1101
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	1102	static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
Gavin Shan	d2cfbcd	2014-10-01 17:07:51 +1000	[diff] [blame]	1103	{
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	1104	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
Gavin Shan	d2cfbcd	2014-10-01 17:07:51 +1000	[diff] [blame]	1105
				1106	if (!edev \|\| !edev->pe)
				1107	return false;
				1108
				1109	if (edev->pe->state & EEH_PE_CFG_BLOCKED)
				1110	return true;
				1111
				1112	return false;
				1113	}
				1114
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	1115	static int pnv_eeh_read_config(struct pci_dn *pdn,
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1116	int where, int size, u32 *val)
Gavin Shan	d2cfbcd	2014-10-01 17:07:51 +1000	[diff] [blame]	1117	{
Gavin Shan	3532a741	2015-03-17 16:15:03 +1100	[diff] [blame]	1118	if (!pdn)
				1119	return PCIBIOS_DEVICE_NOT_FOUND;
				1120
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	1121	if (pnv_eeh_cfg_blocked(pdn)) {
Gavin Shan	d2cfbcd	2014-10-01 17:07:51 +1000	[diff] [blame]	1122	*val = 0xFFFFFFFF;
				1123	return PCIBIOS_SET_FAILED;
				1124	}
				1125
Gavin Shan	3532a741	2015-03-17 16:15:03 +1100	[diff] [blame]	1126	return pnv_pci_cfg_read(pdn, where, size, val);
Gavin Shan	d2cfbcd	2014-10-01 17:07:51 +1000	[diff] [blame]	1127	}
				1128
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	1129	static int pnv_eeh_write_config(struct pci_dn *pdn,
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1130	int where, int size, u32 val)
Gavin Shan	d2cfbcd	2014-10-01 17:07:51 +1000	[diff] [blame]	1131	{
Gavin Shan	3532a741	2015-03-17 16:15:03 +1100	[diff] [blame]	1132	if (!pdn)
				1133	return PCIBIOS_DEVICE_NOT_FOUND;
				1134
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	1135	if (pnv_eeh_cfg_blocked(pdn))
Gavin Shan	d2cfbcd	2014-10-01 17:07:51 +1000	[diff] [blame]	1136	return PCIBIOS_SET_FAILED;
				1137
Gavin Shan	3532a741	2015-03-17 16:15:03 +1100	[diff] [blame]	1138	return pnv_pci_cfg_write(pdn, where, size, val);
Gavin Shan	d2cfbcd	2014-10-01 17:07:51 +1000	[diff] [blame]	1139	}
				1140
Gavin Shan	2a485ad	2015-02-16 14:45:46 +1100	[diff] [blame]	1141	static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
				1142	{
				1143	/* GEM */
				1144	if (data->gemXfir \|\| data->gemRfir \|\|
				1145	data->gemRirqfir \|\| data->gemMask \|\| data->gemRwof)
				1146	pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n",
				1147	be64_to_cpu(data->gemXfir),
				1148	be64_to_cpu(data->gemRfir),
				1149	be64_to_cpu(data->gemRirqfir),
				1150	be64_to_cpu(data->gemMask),
				1151	be64_to_cpu(data->gemRwof));
				1152
				1153	/* LEM */
				1154	if (data->lemFir \|\| data->lemErrMask \|\|
				1155	data->lemAction0 \|\| data->lemAction1 \|\| data->lemWof)
				1156	pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n",
				1157	be64_to_cpu(data->lemFir),
				1158	be64_to_cpu(data->lemErrMask),
				1159	be64_to_cpu(data->lemAction0),
				1160	be64_to_cpu(data->lemAction1),
				1161	be64_to_cpu(data->lemWof));
				1162	}
				1163
				1164	static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
				1165	{
				1166	struct pnv_phb *phb = hose->private_data;
				1167	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
				1168	long rc;
				1169
				1170	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
				1171	if (rc != OPAL_SUCCESS) {
				1172	pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
				1173	__func__, phb->hub_id, rc);
				1174	return;
				1175	}
				1176
				1177	switch (data->type) {
				1178	case OPAL_P7IOC_DIAG_TYPE_RGC:
				1179	pr_info("P7IOC diag-data for RGC\n\n");
				1180	pnv_eeh_dump_hub_diag_common(data);
				1181	if (data->rgc.rgcStatus \|\| data->rgc.rgcLdcp)
				1182	pr_info(" RGC: %016llx %016llx\n",
				1183	be64_to_cpu(data->rgc.rgcStatus),
				1184	be64_to_cpu(data->rgc.rgcLdcp));
				1185	break;
				1186	case OPAL_P7IOC_DIAG_TYPE_BI:
				1187	pr_info("P7IOC diag-data for BI %s\n\n",
				1188	data->bi.biDownbound ? "Downbound" : "Upbound");
				1189	pnv_eeh_dump_hub_diag_common(data);
				1190	if (data->bi.biLdcp0 \|\| data->bi.biLdcp1 \|\|
				1191	data->bi.biLdcp2 \|\| data->bi.biFenceStatus)
				1192	pr_info(" BI: %016llx %016llx %016llx %016llx\n",
				1193	be64_to_cpu(data->bi.biLdcp0),
				1194	be64_to_cpu(data->bi.biLdcp1),
				1195	be64_to_cpu(data->bi.biLdcp2),
				1196	be64_to_cpu(data->bi.biFenceStatus));
				1197	break;
				1198	case OPAL_P7IOC_DIAG_TYPE_CI:
				1199	pr_info("P7IOC diag-data for CI Port %d\n\n",
				1200	data->ci.ciPort);
				1201	pnv_eeh_dump_hub_diag_common(data);
				1202	if (data->ci.ciPortStatus \|\| data->ci.ciPortLdcp)
				1203	pr_info(" CI: %016llx %016llx\n",
				1204	be64_to_cpu(data->ci.ciPortStatus),
				1205	be64_to_cpu(data->ci.ciPortLdcp));
				1206	break;
				1207	case OPAL_P7IOC_DIAG_TYPE_MISC:
				1208	pr_info("P7IOC diag-data for MISC\n\n");
				1209	pnv_eeh_dump_hub_diag_common(data);
				1210	break;
				1211	case OPAL_P7IOC_DIAG_TYPE_I2C:
				1212	pr_info("P7IOC diag-data for I2C\n\n");
				1213	pnv_eeh_dump_hub_diag_common(data);
				1214	break;
				1215	default:
				1216	pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
				1217	__func__, phb->hub_id, data->type);
				1218	}
				1219	}
				1220
				1221	static int pnv_eeh_get_pe(struct pci_controller *hose,
				1222	u16 pe_no, struct eeh_pe **pe)
				1223	{
				1224	struct pnv_phb *phb = hose->private_data;
				1225	struct pnv_ioda_pe *pnv_pe;
				1226	struct eeh_pe *dev_pe;
				1227	struct eeh_dev edev;
				1228
				1229	/*
				1230	* If PHB supports compound PE, to fetch
				1231	* the master PE because slave PE is invisible
				1232	* to EEH core.
				1233	*/
				1234	pnv_pe = &phb->ioda.pe_array[pe_no];
				1235	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
				1236	pnv_pe = pnv_pe->master;
				1237	WARN_ON(!pnv_pe \|\|
				1238	!(pnv_pe->flags & PNV_IODA_PE_MASTER));
				1239	pe_no = pnv_pe->pe_number;
				1240	}
				1241
				1242	/* Find the PE according to PE# */
				1243	memset(&edev, 0, sizeof(struct eeh_dev));
				1244	edev.phb = hose;
				1245	edev.pe_config_addr = pe_no;
				1246	dev_pe = eeh_pe_get(&edev);
				1247	if (!dev_pe)
				1248	return -EEXIST;
				1249
				1250	/* Freeze the (compound) PE */
				1251	*pe = dev_pe;
				1252	if (!(dev_pe->state & EEH_PE_ISOLATED))
				1253	phb->freeze_pe(phb, pe_no);
				1254
				1255	/*
				1256	* At this point, we're sure the (compound) PE should
				1257	* have been frozen. However, we still need poke until
				1258	* hitting the frozen PE on top level.
				1259	*/
				1260	dev_pe = dev_pe->parent;
				1261	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
				1262	int ret;
				1263	int active_flags = (EEH_STATE_MMIO_ACTIVE \|
				1264	EEH_STATE_DMA_ACTIVE);
				1265
				1266	ret = eeh_ops->get_state(dev_pe, NULL);
				1267	if (ret <= 0 \|\| (ret & active_flags) == active_flags) {
				1268	dev_pe = dev_pe->parent;
				1269	continue;
				1270	}
				1271
				1272	/* Frozen parent PE */
				1273	*pe = dev_pe;
				1274	if (!(dev_pe->state & EEH_PE_ISOLATED))
				1275	phb->freeze_pe(phb, dev_pe->addr);
				1276
				1277	/* Next one */
				1278	dev_pe = dev_pe->parent;
				1279	}
				1280
				1281	return 0;
				1282	}
				1283
Gavin Shan	131c123	2014-09-30 12:38:56 +1000	[diff] [blame]	1284	/**
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1285	* pnv_eeh_next_error - Retrieve next EEH error to handle
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1286	* @pe: Affected PE
				1287	*
Gavin Shan	2a485ad	2015-02-16 14:45:46 +1100	[diff] [blame]	1288	* The function is expected to be called by EEH core while it gets
				1289	* special EEH event (without binding PE). The function calls to
				1290	* OPAL APIs for next error to handle. The informational error is
				1291	* handled internally by platform. However, the dead IOC, dead PHB,
				1292	* fenced PHB and frozen PE should be handled by EEH core eventually.
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1293	*/
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1294	static int pnv_eeh_next_error(struct eeh_pe **pe)
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1295	{
				1296	struct pci_controller *hose;
Gavin Shan	2a485ad	2015-02-16 14:45:46 +1100	[diff] [blame]	1297	struct pnv_phb *phb;
				1298	struct eeh_pe phb_pe, parent_pe;
				1299	__be64 frozen_pe_no;
				1300	__be16 err_type, severity;
				1301	int active_flags = (EEH_STATE_MMIO_ACTIVE \| EEH_STATE_DMA_ACTIVE);
				1302	long rc;
				1303	int state, ret = EEH_NEXT_ERR_NONE;
				1304
				1305	/*
				1306	* While running here, it's safe to purge the event queue.
				1307	* And we should keep the cached OPAL notifier event sychronized
				1308	* between the kernel and firmware.
				1309	*/
				1310	eeh_remove_event(NULL, false);
				1311	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1312
				1313	list_for_each_entry(hose, &hose_list, list_node) {
Gavin Shan	2a485ad	2015-02-16 14:45:46 +1100	[diff] [blame]	1314	/*
				1315	* If the subordinate PCI buses of the PHB has been
				1316	* removed or is exactly under error recovery, we
				1317	* needn't take care of it any more.
				1318	*/
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1319	phb = hose->private_data;
Gavin Shan	2a485ad	2015-02-16 14:45:46 +1100	[diff] [blame]	1320	phb_pe = eeh_phb_pe_get(hose);
				1321	if (!phb_pe \|\| (phb_pe->state & EEH_PE_ISOLATED))
				1322	continue;
				1323
				1324	rc = opal_pci_next_error(phb->opal_id,
				1325	&frozen_pe_no, &err_type, &severity);
				1326	if (rc != OPAL_SUCCESS) {
				1327	pr_devel("%s: Invalid return value on "
				1328	"PHB#%x (0x%lx) from opal_pci_next_error",
				1329	__func__, hose->global_number, rc);
				1330	continue;
				1331	}
				1332
				1333	/* If the PHB doesn't have error, stop processing */
				1334	if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR \|\|
				1335	be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
				1336	pr_devel("%s: No error found on PHB#%x\n",
				1337	__func__, hose->global_number);
				1338	continue;
				1339	}
				1340
				1341	/*
				1342	* Processing the error. We're expecting the error with
				1343	* highest priority reported upon multiple errors on the
				1344	* specific PHB.
				1345	*/
				1346	pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
				1347	__func__, be16_to_cpu(err_type),
				1348	be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
				1349	hose->global_number);
				1350	switch (be16_to_cpu(err_type)) {
				1351	case OPAL_EEH_IOC_ERROR:
				1352	if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
				1353	pr_err("EEH: dead IOC detected\n");
				1354	ret = EEH_NEXT_ERR_DEAD_IOC;
				1355	} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
				1356	pr_info("EEH: IOC informative error "
				1357	"detected\n");
				1358	pnv_eeh_get_and_dump_hub_diag(hose);
				1359	ret = EEH_NEXT_ERR_NONE;
				1360	}
				1361
				1362	break;
				1363	case OPAL_EEH_PHB_ERROR:
				1364	if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
				1365	*pe = phb_pe;
				1366	pr_err("EEH: dead PHB#%x detected, "
				1367	"location: %s\n",
				1368	hose->global_number,
				1369	eeh_pe_loc_get(phb_pe));
				1370	ret = EEH_NEXT_ERR_DEAD_PHB;
				1371	} else if (be16_to_cpu(severity) ==
				1372	OPAL_EEH_SEV_PHB_FENCED) {
				1373	*pe = phb_pe;
				1374	pr_err("EEH: Fenced PHB#%x detected, "
				1375	"location: %s\n",
				1376	hose->global_number,
				1377	eeh_pe_loc_get(phb_pe));
				1378	ret = EEH_NEXT_ERR_FENCED_PHB;
				1379	} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
				1380	pr_info("EEH: PHB#%x informative error "
				1381	"detected, location: %s\n",
				1382	hose->global_number,
				1383	eeh_pe_loc_get(phb_pe));
				1384	pnv_eeh_get_phb_diag(phb_pe);
				1385	pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
				1386	ret = EEH_NEXT_ERR_NONE;
				1387	}
				1388
				1389	break;
				1390	case OPAL_EEH_PE_ERROR:
				1391	/*
				1392	* If we can't find the corresponding PE, we
				1393	* just try to unfreeze.
				1394	*/
				1395	if (pnv_eeh_get_pe(hose,
				1396	be64_to_cpu(frozen_pe_no), pe)) {
				1397	/* Try best to clear it */
				1398	pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
				1399	hose->global_number, frozen_pe_no);
				1400	pr_info("EEH: PHB location: %s\n",
				1401	eeh_pe_loc_get(phb_pe));
				1402	opal_pci_eeh_freeze_clear(phb->opal_id,
				1403	frozen_pe_no,
				1404	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
				1405	ret = EEH_NEXT_ERR_NONE;
				1406	} else if ((*pe)->state & EEH_PE_ISOLATED \|\|
				1407	eeh_pe_passed(*pe)) {
				1408	ret = EEH_NEXT_ERR_NONE;
				1409	} else {
				1410	pr_err("EEH: Frozen PE#%x "
				1411	"on PHB#%x detected\n",
				1412	(*pe)->addr,
				1413	(*pe)->phb->global_number);
				1414	pr_err("EEH: PE location: %s, "
				1415	"PHB location: %s\n",
				1416	eeh_pe_loc_get(*pe),
				1417	eeh_pe_loc_get(phb_pe));
				1418	ret = EEH_NEXT_ERR_FROZEN_PE;
				1419	}
				1420
				1421	break;
				1422	default:
				1423	pr_warn("%s: Unexpected error type %d\n",
				1424	__func__, be16_to_cpu(err_type));
				1425	}
				1426
				1427	/*
				1428	* EEH core will try recover from fenced PHB or
				1429	* frozen PE. In the time for frozen PE, EEH core
				1430	* enable IO path for that before collecting logs,
				1431	* but it ruins the site. So we have to dump the
				1432	* log in advance here.
				1433	*/
				1434	if ((ret == EEH_NEXT_ERR_FROZEN_PE \|\|
				1435	ret == EEH_NEXT_ERR_FENCED_PHB) &&
				1436	!((*pe)->state & EEH_PE_ISOLATED)) {
				1437	eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
				1438	pnv_eeh_get_phb_diag(*pe);
				1439
				1440	if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
				1441	pnv_pci_dump_phb_diag_data((*pe)->phb,
				1442	(*pe)->data);
				1443	}
				1444
				1445	/*
				1446	* We probably have the frozen parent PE out there and
				1447	* we need have to handle frozen parent PE firstly.
				1448	*/
				1449	if (ret == EEH_NEXT_ERR_FROZEN_PE) {
				1450	parent_pe = (*pe)->parent;
				1451	while (parent_pe) {
				1452	/* Hit the ceiling ? */
				1453	if (parent_pe->type & EEH_PE_PHB)
				1454	break;
				1455
				1456	/* Frozen parent PE ? */
				1457	state = eeh_ops->get_state(parent_pe, NULL);
				1458	if (state > 0 &&
				1459	(state & active_flags) != active_flags)
				1460	*pe = parent_pe;
				1461
				1462	/* Next parent level */
				1463	parent_pe = parent_pe->parent;
				1464	}
				1465
				1466	/* We possibly migrate to another PE */
				1467	eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
				1468	}
				1469
				1470	/*
				1471	* If we have no errors on the specific PHB or only
				1472	* informative error there, we continue poking it.
				1473	* Otherwise, we need actions to be taken by upper
				1474	* layer.
				1475	*/
				1476	if (ret > EEH_NEXT_ERR_INF)
				1477	break;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1478	}
				1479
Gavin Shan	2a485ad	2015-02-16 14:45:46 +1100	[diff] [blame]	1480	return ret;
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1481	}
				1482
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	1483	static int pnv_eeh_restore_config(struct pci_dn *pdn)
Gavin Shan	9be3bec	2014-01-03 17:47:13 +0800	[diff] [blame]	1484	{
Gavin Shan	0bd7858	2015-03-17 16:15:07 +1100	[diff] [blame]	1485	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
Gavin Shan	9be3bec	2014-01-03 17:47:13 +0800	[diff] [blame]	1486	struct pnv_phb *phb;
				1487	s64 ret;
				1488
				1489	if (!edev)
				1490	return -EEXIST;
				1491
				1492	phb = edev->phb->private_data;
				1493	ret = opal_pci_reinit(phb->opal_id,
				1494	OPAL_REINIT_PCI_DEV, edev->config_addr);
				1495	if (ret) {
				1496	pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
				1497	__func__, edev->config_addr, ret);
				1498	return -EIO;
				1499	}
				1500
				1501	return 0;
				1502	}
				1503
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1504	static struct eeh_ops pnv_eeh_ops = {
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1505	.name = "powernv",
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1506	.init = pnv_eeh_init,
				1507	.post_init = pnv_eeh_post_init,
Gavin Shan	ff57b45	2015-03-17 16:15:06 +1100	[diff] [blame]	1508	.probe = pnv_eeh_probe,
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1509	.set_option = pnv_eeh_set_option,
				1510	.get_pe_addr = pnv_eeh_get_pe_addr,
				1511	.get_state = pnv_eeh_get_state,
				1512	.reset = pnv_eeh_reset,
				1513	.wait_state = pnv_eeh_wait_state,
				1514	.get_log = pnv_eeh_get_log,
				1515	.configure_bridge = pnv_eeh_configure_bridge,
				1516	.err_inject = pnv_eeh_err_inject,
				1517	.read_config = pnv_eeh_read_config,
				1518	.write_config = pnv_eeh_write_config,
				1519	.next_error = pnv_eeh_next_error,
				1520	.restore_config = pnv_eeh_restore_config
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1521	};
				1522
				1523	/**
				1524	* eeh_powernv_init - Register platform dependent EEH operations
				1525	*
				1526	* EEH initialization on powernv platform. This function should be
				1527	* called before any EEH related functions.
				1528	*/
				1529	static int __init eeh_powernv_init(void)
				1530	{
				1531	int ret = -EINVAL;
				1532
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	1533	eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
Gavin Shan	01f3bfb	2015-02-16 14:45:39 +1100	[diff] [blame]	1534	ret = eeh_ops_register(&pnv_eeh_ops);
Gavin Shan	29310e5	2013-06-20 13:21:13 +0800	[diff] [blame]	1535	if (!ret)
				1536	pr_info("EEH: PowerNV platform initialized\n");
				1537	else
				1538	pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
				1539
				1540	return ret;
				1541	}
Michael Ellerman	b14726c	2014-07-15 22:22:24 +1000	[diff] [blame]	1542	machine_early_initcall(powernv, eeh_powernv_init);